From 6b943ff3a3f8617113ecbf611cf0f8957e4e19d2 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 2 May 2011 19:34:44 +0000 Subject: Vendor import of llvm trunk r130700: http://llvm.org/svn/llvm-project/llvm/trunk@130700 --- test/Analysis/BasicAA/intrinsics.ll | 39 + test/Analysis/BasicAA/store-promote.ll | 2 +- .../2006-09-26-PostDominanceFrontier.ll | 97 - .../2007-04-17-PostDominanceFrontier.ll | 692 --- .../PostDominators/2007-04-20-PostDom-Reset.ll | 28 - test/Analysis/RegionInfo/next.ll | 4 +- .../2011-03-09-ExactNoMaxBECount.ll | 34 + .../ScalarEvolution/2011-04-26-FoldAddRec.ll | 33 + test/Analysis/ScalarEvolution/nsw-offset.ll | 12 +- test/Analysis/ScalarEvolution/nsw.ll | 12 +- test/Analysis/ScalarEvolution/sext-iv-0.ll | 11 +- test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll | 27 + test/Assembler/AutoUpgradeIntrinsics.ll | 12 + test/Assembler/aggregate-return-single-value.ll | 6 - test/Assembler/comment.ll | 5 +- test/Bitcode/neon-intrinsics.ll | 13 +- test/CMakeLists.txt | 22 +- test/CodeGen/ARM/2009-10-27-double-align.ll | 3 +- test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll | 2 +- test/CodeGen/ARM/2010-05-18-PostIndexBug.ll | 6 +- test/CodeGen/ARM/2010-08-04-StackVariable.ll | 4 +- test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll | 6 +- test/CodeGen/ARM/2010-11-29-PrologueBug.ll | 2 +- test/CodeGen/ARM/2010-12-13-reloc-pic.ll | 100 - test/CodeGen/ARM/2010-12-15-elf-lcomm.ll | 6 +- test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll | 47 + test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll | 55 + test/CodeGen/ARM/2011-03-23-PeepholeBug.ll | 41 + test/CodeGen/ARM/2011-04-07-schediv.ll | 31 + test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll | 34 + test/CodeGen/ARM/2011-04-12-AlignBug.ll | 11 + test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll | 15 + test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll | 22 + test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll | 41 + test/CodeGen/ARM/2011-04-26-SchedTweak.ll | 70 + test/CodeGen/ARM/2011-04-27-IfCvtBug.ll | 59 + test/CodeGen/ARM/align.ll | 2 +- test/CodeGen/ARM/arguments.ll | 2 +- test/CodeGen/ARM/arm-and-tst-peephole.ll | 10 +- test/CodeGen/ARM/arm-returnaddr.ll | 4 +- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 16 + test/CodeGen/ARM/bx_fold.ll | 2 +- test/CodeGen/ARM/call-tc.ll | 2 +- test/CodeGen/ARM/carry.ll | 17 + test/CodeGen/ARM/code-placement.ll | 4 +- test/CodeGen/ARM/constants.ll | 12 +- test/CodeGen/ARM/crash-greedy.ll | 84 + test/CodeGen/ARM/debug-info-d16-reg.ll | 105 + test/CodeGen/ARM/debug-info-qreg.ll | 94 + test/CodeGen/ARM/debug-info-s16-reg.ll | 116 + test/CodeGen/ARM/divmod.ll | 58 + test/CodeGen/ARM/fabss.ll | 2 +- test/CodeGen/ARM/fadds.ll | 2 +- test/CodeGen/ARM/fast-isel-pred.ll | 58 + test/CodeGen/ARM/fast-isel-redefinition.ll | 11 + test/CodeGen/ARM/fast-isel-static.ll | 2 +- test/CodeGen/ARM/fast-isel.ll | 55 +- test/CodeGen/ARM/fcopysign.ll | 32 + test/CodeGen/ARM/fdivs.ll | 2 +- test/CodeGen/ARM/fmacs.ll | 53 + test/CodeGen/ARM/fmuls.ll | 2 +- test/CodeGen/ARM/fnmscs.ll | 17 +- test/CodeGen/ARM/fp-arg-shuffle.ll | 11 + test/CodeGen/ARM/fp.ll | 2 +- test/CodeGen/ARM/fp_convert.ll | 8 +- test/CodeGen/ARM/fpcmp-opt.ll | 2 +- test/CodeGen/ARM/ifcvt10.ll | 4 +- test/CodeGen/ARM/ifcvt5.ll | 2 +- test/CodeGen/ARM/ifcvt6.ll | 2 +- test/CodeGen/ARM/ifcvt7.ll | 2 +- test/CodeGen/ARM/ifcvt8.ll | 2 +- test/CodeGen/ARM/indirectbr.ll | 27 +- test/CodeGen/ARM/inlineasm3.ll | 2 +- test/CodeGen/ARM/int-to-fp.ll | 19 + test/CodeGen/ARM/ldm.ll | 10 +- test/CodeGen/ARM/ldrd.ll | 18 +- test/CodeGen/ARM/long.ll | 6 +- test/CodeGen/ARM/long_shift.ll | 10 +- test/CodeGen/ARM/lsr-code-insertion.ll | 8 +- test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 5 - test/CodeGen/ARM/memcpy-inline.ll | 12 +- test/CodeGen/ARM/neon_div.ll | 2 +- test/CodeGen/ARM/neon_shift.ll | 11 + test/CodeGen/ARM/peephole-bitcast.ll | 26 + test/CodeGen/ARM/prefetch.ll | 21 +- test/CodeGen/ARM/reg_sequence.ll | 22 +- test/CodeGen/ARM/rev.ll | 30 +- test/CodeGen/ARM/select-imm.ll | 4 +- test/CodeGen/ARM/select.ll | 23 + test/CodeGen/ARM/select_xform.ll | 2 +- test/CodeGen/ARM/shifter_operand.ll | 10 +- test/CodeGen/ARM/shuffle.ll | 18 + test/CodeGen/ARM/smul.ll | 16 +- test/CodeGen/ARM/str_pre-2.ll | 5 +- test/CodeGen/ARM/sub.ll | 11 +- test/CodeGen/ARM/thumb1-varalloc.ll | 6 +- test/CodeGen/ARM/trap.ll | 11 +- test/CodeGen/ARM/umulo-32.ll | 27 + test/CodeGen/ARM/unaligned_load_store.ll | 16 +- test/CodeGen/ARM/undef-sext.ll | 14 + test/CodeGen/ARM/va_arg.ll | 10 +- test/CodeGen/ARM/vbsl-constant.ll | 115 + test/CodeGen/ARM/vcgt.ll | 7 +- test/CodeGen/ARM/vector-DAGCombine.ll | 18 + test/CodeGen/ARM/vext.ll | 12 +- test/CodeGen/ARM/vfp.ll | 5 +- test/CodeGen/ARM/vld1.ll | 9 +- test/CodeGen/ARM/vld3.ll | 7 +- test/CodeGen/ARM/vldlane.ll | 13 +- test/CodeGen/ARM/vmul.ll | 155 +- test/CodeGen/ARM/vst3.ll | 2 +- .../Blackfin/2009-08-04-LowerExtract-Live.ll | 1 + test/CodeGen/CellSPU/jumptable.ll | 2 +- test/CodeGen/CellSPU/loads.ll | 7 + test/CodeGen/CellSPU/rotate_ops.ll | 6 +- test/CodeGen/CellSPU/shift_ops.ll | 61 +- test/CodeGen/CellSPU/stores.ll | 8 + test/CodeGen/CellSPU/v2f32.ll | 12 +- test/CodeGen/Generic/crash.ll | 28 + test/CodeGen/MBlaze/fsl.ll | 18 +- test/CodeGen/MBlaze/loop.ll | 6 +- test/CodeGen/Mips/2008-07-22-Cstpool.ll | 2 +- test/CodeGen/Mips/2008-07-23-fpcmp.ll | 4 + test/CodeGen/Mips/2008-07-29-icmp.ll | 4 + test/CodeGen/Mips/2008-08-06-Alloca.ll | 1 + test/CodeGen/Mips/2010-07-20-Select.ll | 7 +- test/CodeGen/Mips/addc.ll | 13 + test/CodeGen/Mips/analyzebranch.ll | 46 + test/CodeGen/Mips/blockaddr.ll | 31 + test/CodeGen/Mips/buildpairextractelementf64.ll | 23 + test/CodeGen/Mips/cmov.ll | 5 +- test/CodeGen/Mips/divrem.ll | 51 + test/CodeGen/Mips/fpbr.ll | 119 + test/CodeGen/Mips/fpcmp.ll | 23 + test/CodeGen/Mips/internalfunc.ll | 52 + test/CodeGen/Mips/largeimm1.ll | 13 + test/CodeGen/Mips/o32_cc.ll | 52 +- test/CodeGen/Mips/o32_cc_vararg.ll | 278 + test/CodeGen/Mips/select.ll | 196 + test/CodeGen/PTX/add.ll | 70 +- test/CodeGen/PTX/bitwise.ll | 24 + test/CodeGen/PTX/bra.ll | 24 + test/CodeGen/PTX/exit.ll | 2 +- test/CodeGen/PTX/fdiv-sm10.ll | 15 + test/CodeGen/PTX/fdiv-sm13.ll | 15 + test/CodeGen/PTX/intrinsic.ll | 281 + test/CodeGen/PTX/ld.ll | 431 +- test/CodeGen/PTX/llvm-intrinsic.ll | 56 + test/CodeGen/PTX/mad.ll | 17 + test/CodeGen/PTX/mov.ll | 59 +- test/CodeGen/PTX/mul.ll | 39 + test/CodeGen/PTX/options.ll | 8 +- test/CodeGen/PTX/parameter-order.ll | 8 + test/CodeGen/PTX/ret.ll | 2 +- test/CodeGen/PTX/setp.ll | 134 + test/CodeGen/PTX/shl.ll | 2 +- test/CodeGen/PTX/shr.ll | 2 +- test/CodeGen/PTX/st.ll | 389 +- test/CodeGen/PTX/sub.ll | 70 +- test/CodeGen/PowerPC/2008-12-12-EH.ll | 2 +- test/CodeGen/PowerPC/2010-05-03-retaddr1.ll | 1 + test/CodeGen/PowerPC/Atomics-64.ll | 10 +- test/CodeGen/PowerPC/Frames-small.ll | 2 +- test/CodeGen/PowerPC/indirectbr.ll | 14 +- test/CodeGen/PowerPC/mulhs.ll | 2 +- test/CodeGen/PowerPC/ppc-prologue.ll | 4 +- test/CodeGen/SPARC/2011-01-11-FrameAddr.ll | 2 + .../SystemZ/2009-07-10-BadIncomingArgOffset.ll | 3 +- test/CodeGen/Thumb/2009-08-20-ISelBug.ll | 2 +- .../Thumb/2010-01-15-local-alloc-spill-physical.ll | 20 - test/CodeGen/Thumb/2010-07-15-debugOrdering.ll | 2 +- test/CodeGen/Thumb/dyn-stackalloc.ll | 27 +- test/CodeGen/Thumb/rev.ll | 56 + test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 2 +- test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll | 2 +- test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll | 5 +- test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll | 16 +- test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll | 2 +- test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll | 23 + test/CodeGen/Thumb2/bfi.ll | 11 + test/CodeGen/Thumb2/cross-rc-coalescing-2.ll | 2 - test/CodeGen/Thumb2/ldr-str-imm12.ll | 4 +- test/CodeGen/Thumb2/machine-licm.ll | 26 +- test/CodeGen/Thumb2/thumb2-ldrd.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsr3.ll | 6 +- test/CodeGen/Thumb2/thumb2-ror.ll | 17 +- test/CodeGen/Thumb2/thumb2-ror2.ll | 11 - test/CodeGen/Thumb2/thumb2-sbc.ll | 19 +- test/CodeGen/Thumb2/thumb2-sub3.ll | 10 +- test/CodeGen/Thumb2/thumb2-sub5.ll | 7 +- test/CodeGen/Thumb2/thumb2-uxtb.ll | 4 +- test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll | 7 +- test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll | 4 +- test/CodeGen/X86/2007-05-05-Personality.ll | 5 +- test/CodeGen/X86/2007-07-03-GR64ToVR64.ll | 8 +- test/CodeGen/X86/2007-07-18-Vector-Extract.ll | 6 +- test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll | 4 +- test/CodeGen/X86/2007-09-27-LDIntrinsics.ll | 4 +- test/CodeGen/X86/2008-02-22-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-03-18-CoalescerBug.ll | 4 +- test/CodeGen/X86/2008-04-02-unnamedEH.ll | 2 +- test/CodeGen/X86/2008-04-16-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-07-11-SpillerBug.ll | 1 - test/CodeGen/X86/2008-07-19-movups-spills.ll | 6 +- test/CodeGen/X86/2008-08-05-SpillerBug.ll | 44 - test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 6 +- test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll | 6 +- test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll | 2 +- test/CodeGen/X86/2009-03-11-CoalescerBug.ll | 85 - test/CodeGen/X86/2009-03-16-SpillerBug.ll | 2 +- test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 +- test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll | 2 +- test/CodeGen/X86/2009-04-24.ll | 2 +- test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll | 2 +- test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll | 5 +- .../X86/2010-05-03-CoalescerSubRegClobber.ll | 4 +- test/CodeGen/X86/2010-05-25-DotDebugLoc.ll | 1 + test/CodeGen/X86/2010-05-26-DotDebugLoc.ll | 18 +- test/CodeGen/X86/2010-05-28-Crash.ll | 1 + test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll | 3 +- test/CodeGen/X86/2010-08-04-StackVariable.ll | 4 +- test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll | 10 +- test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll | 2 +- test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll | 1 + test/CodeGen/X86/2011-02-27-Fpextend.ll | 7 + test/CodeGen/X86/2011-03-02-DAGCombiner.ll | 51 + test/CodeGen/X86/2011-03-08-Sched-crash.ll | 56 + test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll | 22 + test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll | 10 + test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll | 65 + test/CodeGen/X86/3dnow-intrinsics.ll | 297 ++ test/CodeGen/X86/MachineSink-PHIUse.ll | 2 +- test/CodeGen/X86/SIMD/dg.exp | 5 + test/CodeGen/X86/SIMD/notvunpcklpd.ll | 20 + test/CodeGen/X86/SIMD/notvunpcklps.ll | 20 + test/CodeGen/X86/SIMD/vunpcklpd.ll | 20 + test/CodeGen/X86/SIMD/vunpcklps.ll | 20 + test/CodeGen/X86/abi-isel.ll | 5575 ++++++++++---------- test/CodeGen/X86/add.ll | 15 + test/CodeGen/X86/adde-carry.ll | 26 + test/CodeGen/X86/aliases.ll | 2 - test/CodeGen/X86/alignment.ll | 6 +- test/CodeGen/X86/apm.ll | 11 +- test/CodeGen/X86/avoid-lea-scale2.ll | 4 +- test/CodeGen/X86/avx-intrinsics-x86.ll | 4 +- test/CodeGen/X86/bool-zext.ll | 35 + test/CodeGen/X86/break-anti-dependencies.ll | 3 +- test/CodeGen/X86/byval.ll | 3 +- test/CodeGen/X86/byval2.ll | 27 +- test/CodeGen/X86/byval3.ll | 27 +- test/CodeGen/X86/byval4.ll | 27 +- test/CodeGen/X86/byval5.ll | 27 +- test/CodeGen/X86/call-push.ll | 16 + test/CodeGen/X86/coalesce-esp.ll | 2 +- test/CodeGen/X86/coalescer-commute2.ll | 9 +- test/CodeGen/X86/coalescer-cross.ll | 6 +- test/CodeGen/X86/commute-two-addr.ll | 3 +- test/CodeGen/X86/constant-pool-remat-0.ll | 17 +- test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll | 10 +- test/CodeGen/X86/crash.ll | 18 +- test/CodeGen/X86/dbg-declare-arg.ll | 123 + test/CodeGen/X86/dbg-file-name.ll | 19 + test/CodeGen/X86/dbg-merge-loc-entry.ll | 3 +- test/CodeGen/X86/dbg-value-inlined-parameter.ll | 1 + test/CodeGen/X86/dbg-value-location.ll | 1 + test/CodeGen/X86/dbg-value-range.ll | 17 +- test/CodeGen/X86/divide-by-constant.ll | 11 + test/CodeGen/X86/dyn-stackalloc.ll | 9 +- test/CodeGen/X86/fast-isel-gep.ll | 19 + test/CodeGen/X86/fast-isel-i1.ll | 39 +- test/CodeGen/X86/fast-isel-shift-imm.ll | 8 - test/CodeGen/X86/fast-isel-x86-64.ll | 262 + test/CodeGen/X86/fast-isel-x86.ll | 17 +- test/CodeGen/X86/fast-isel.ll | 11 + test/CodeGen/X86/fold-mul-lohi.ll | 5 +- test/CodeGen/X86/fold-pcmpeqd-0.ll | 16 +- test/CodeGen/X86/fold-pcmpeqd-2.ll | 17 +- test/CodeGen/X86/fold-zext-trunc.ll | 23 + test/CodeGen/X86/fp-stack-compare.ll | 14 +- test/CodeGen/X86/fp-trunc.ll | 35 + test/CodeGen/X86/global-sections-tls.ll | 2 +- test/CodeGen/X86/global-sections.ll | 6 +- test/CodeGen/X86/h-register-store.ll | 32 +- test/CodeGen/X86/h-registers-0.ll | 26 +- test/CodeGen/X86/h-registers-1.ll | 2 +- test/CodeGen/X86/hidden-vis-pic.ll | 2 +- test/CodeGen/X86/i64-mem-copy.ll | 8 +- test/CodeGen/X86/iabs.ll | 11 +- test/CodeGen/X86/isel-sink3.ll | 6 +- test/CodeGen/X86/lea-3.ll | 17 +- test/CodeGen/X86/lock-inst-encoding.ll | 5 +- test/CodeGen/X86/loop-strength-reduce4.ll | 6 +- test/CodeGen/X86/lsr-interesting-step.ll | 4 +- test/CodeGen/X86/lsr-quadratic-expand.ll | 22 + test/CodeGen/X86/lsr-redundant-addressing.ll | 45 + test/CodeGen/X86/lsr-reuse-trunc.ll | 5 +- test/CodeGen/X86/lsr-reuse.ll | 1 + test/CodeGen/X86/machine-cse.ll | 4 +- test/CodeGen/X86/mcinst-lowering-cmp0.ll | 68 - test/CodeGen/X86/mmx-copy-gprs.ll | 10 +- test/CodeGen/X86/narrow-shl-cst.ll | 101 + test/CodeGen/X86/no-cfi.ll | 38 + test/CodeGen/X86/optimize-max-3.ll | 4 +- test/CodeGen/X86/or-address.ll | 8 +- test/CodeGen/X86/peep-vector-extract-concat.ll | 7 +- test/CodeGen/X86/personality.ll | 22 +- test/CodeGen/X86/phi-bit-propagation.ll | 55 + test/CodeGen/X86/phi-constants.ll | 35 - test/CodeGen/X86/pic.ll | 2 +- test/CodeGen/X86/pmulld.ll | 12 +- test/CodeGen/X86/postra-licm.ll | 9 +- test/CodeGen/X86/pr2659.ll | 3 +- test/CodeGen/X86/pr3366.ll | 2 +- test/CodeGen/X86/pr3495-2.ll | 2 +- test/CodeGen/X86/pr3495.ll | 6 +- test/CodeGen/X86/pr9743.ll | 17 + test/CodeGen/X86/pre-split1.ll | 2 +- test/CodeGen/X86/pre-split10.ll | 2 +- test/CodeGen/X86/pre-split11.ll | 2 +- test/CodeGen/X86/pre-split2.ll | 2 +- test/CodeGen/X86/pre-split3.ll | 2 +- test/CodeGen/X86/pre-split4.ll | 2 +- test/CodeGen/X86/pre-split5.ll | 2 +- test/CodeGen/X86/pre-split6.ll | 2 +- test/CodeGen/X86/pre-split7.ll | 2 +- test/CodeGen/X86/pre-split8.ll | 2 +- test/CodeGen/X86/pre-split9.ll | 2 +- test/CodeGen/X86/remat-scalar-zero.ll | 1 + test/CodeGen/X86/scalar-min-max-fill-operand.ll | 13 +- test/CodeGen/X86/sext-i1.ll | 4 +- test/CodeGen/X86/shrink-compare.ll | 36 + test/CodeGen/X86/sse-align-0.ll | 3 +- test/CodeGen/X86/sse-align-3.ll | 7 +- test/CodeGen/X86/sse-align-7.ll | 4 +- test/CodeGen/X86/sse-commute.ll | 2 +- test/CodeGen/X86/sse2.ll | 6 +- test/CodeGen/X86/sse3.ll | 12 +- test/CodeGen/X86/sse_reload_fold.ll | 5 +- test/CodeGen/X86/stdarg.ll | 3 +- test/CodeGen/X86/stride-nine-with-base-reg.ll | 5 +- test/CodeGen/X86/stride-reuse.ll | 5 +- test/CodeGen/X86/sub-with-overflow.ll | 22 +- test/CodeGen/X86/tail-opts.ll | 30 +- test/CodeGen/X86/tailcall-returndup-void.ll | 37 + test/CodeGen/X86/tailcallbyval64.ll | 25 +- test/CodeGen/X86/tailcallstack64.ll | 12 +- test/CodeGen/X86/test-nofold.ll | 8 +- test/CodeGen/X86/twoaddr-lea.ll | 11 + test/CodeGen/X86/umulo-64.ll | 28 - test/CodeGen/X86/unaligned-load.ll | 4 +- test/CodeGen/X86/unknown-location.ll | 13 +- test/CodeGen/X86/unreachable-stack-protector.ll | 19 + test/CodeGen/X86/v2f32.ll | 67 +- test/CodeGen/X86/vec_cast.ll | 4 +- test/CodeGen/X86/vec_set-8.ll | 7 +- test/CodeGen/X86/vec_shuffle-16.ll | 23 +- test/CodeGen/X86/vec_shuffle-17.ll | 7 +- test/CodeGen/X86/vec_uint_to_fp.ll | 11 + test/CodeGen/X86/visibility.ll | 7 +- test/CodeGen/X86/widen_load-0.ll | 8 +- test/CodeGen/X86/win64_alloca_dynalloca.ll | 74 + test/CodeGen/X86/win64_vararg.ll | 33 + test/CodeGen/X86/win_chkstk.ll | 3 +- test/CodeGen/X86/x86-64-malloc.ll | 4 +- test/CodeGen/X86/zext-extract_subreg.ll | 1 + test/CodeGen/X86/zext-sext.ll | 1 + test/CodeGen/XCore/events.ll | 20 + test/CodeGen/XCore/mul64.ll | 33 +- test/CodeGen/XCore/ps-intrinsics.ll | 18 + test/CodeGen/XCore/resources.ll | 24 + test/CodeGen/XCore/scavenging.ll | 52 + test/CodeGen/XCore/sr-intrinsics.ll | 18 + test/CodeGen/XCore/threads.ll | 67 + test/CodeGen/XCore/trampoline.ll | 4 +- test/DebugInfo/2010-04-13-PubType.ll | 2 +- test/DebugInfo/array.ll | 34 + test/FrontendAda/real_cst.adb | 2 +- test/FrontendC++/2009-07-15-LineNumbers.cpp | 27 - test/FrontendC/2006-05-01-AppleAlignmentPragma.c | 2 +- test/FrontendC/2010-07-27-MinNoFoldConst.c | 2 +- test/FrontendC/2011-03-02-UnionInitializer.c | 2 + .../2011-03-08-ZeroFieldUnionInitializer.c | 7 + test/FrontendC/2011-03-31-ArrayRefFolding.c | 15 + test/FrontendC/cstring-align.c | 11 - test/FrontendC/mmx-inline-asm.c | 24 + test/FrontendC/vla-3.c | 11 + .../2011-03-02-ConstCFStringLiteralAlign.m | 11 + test/FrontendObjC/2011-03-08-IVarLookup.m | 32 + test/MC/ARM/arm_addrmode2.s | 34 + test/MC/ARM/arm_addrmode3.s | 18 + test/MC/ARM/arm_instructions.s | 27 + test/MC/ARM/elf-reloc-01.ll | 9 +- test/MC/ARM/elf-reloc-02.ll | 7 +- test/MC/ARM/elf-reloc-03.ll | 7 +- test/MC/ARM/neon-shift-encoding.s | 267 +- test/MC/ARM/simple-encoding.ll | 8 +- test/MC/ARM/thumb2.s | 16 + test/MC/AsmParser/directive_space.s | 5 + test/MC/AsmParser/dot-symbol.s | 12 + test/MC/AsmParser/exprs-invalid.s | 19 +- test/MC/AsmParser/floating-literals.s | 9 + test/MC/AsmParser/rename.s | 6 +- test/MC/AsmParser/section.s | 10 +- test/MC/COFF/basic-coff.s | 266 +- test/MC/COFF/bss.s | 30 +- test/MC/COFF/diff.s | 46 + test/MC/COFF/simple-fixups.s | 100 +- test/MC/COFF/symbol-alias.s | 124 +- test/MC/COFF/symbol-fragment-offset.s | 374 +- test/MC/COFF/weak.s | 102 +- test/MC/Disassembler/ARM/arm-tests.txt | 164 +- test/MC/Disassembler/ARM/invalid-BFI-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt | 10 + test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt | 4 + test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt | 4 + test/MC/Disassembler/ARM/invalid-DMB-thumb.txt | 16 + test/MC/Disassembler/ARM/invalid-DSB-arm.txt | 16 + test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt | 10 + .../MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt | 13 + test/MC/Disassembler/ARM/invalid-LDRT-arm.txt | 12 + test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt | 4 + test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt | 4 + test/MC/Disassembler/ARM/invalid-LSL-regform.txt | 11 + test/MC/Disassembler/ARM/invalid-MCR-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-MOVr-arm.txt | 13 + test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt | 9 + test/MC/Disassembler/ARM/invalid-MOVs-arm.txt | 17 + test/MC/Disassembler/ARM/invalid-MSRi-arm.txt | 12 + .../MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-RSC-arm.txt | 9 + test/MC/Disassembler/ARM/invalid-SBFX-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-SRS-arm.txt | 13 + test/MC/Disassembler/ARM/invalid-SSAT-arm.txt | 11 + .../Disassembler/ARM/invalid-STMIA_UPD-thumb.txt | 10 + test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-SXTB-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt | 12 + .../Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt | 10 + .../ARM/invalid-VLD3DUPd32_UPD-thumb.txt | 11 + .../Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt | 4 + test/MC/Disassembler/ARM/invalid-VQADD-arm.txt | 10 + .../Disassembler/ARM/invalid-VST2b32_UPD-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt | 11 + test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt | 10 + .../MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt | 10 + .../Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt | 10 + .../Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt | 10 + .../Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt | 10 + .../MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt | 10 + .../MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt | 10 + .../Disassembler/ARM/invalid-t2STR_POST-thumb.txt | 10 + test/MC/Disassembler/ARM/neon-tests.txt | 30 + test/MC/Disassembler/ARM/thumb-printf.txt | 77 + test/MC/Disassembler/ARM/thumb-tests.txt | 154 +- test/MC/Disassembler/X86/simple-tests.txt | 6 + test/MC/ELF/alias-reloc.s | 28 +- test/MC/ELF/align-bss.s | 2 +- test/MC/ELF/align-nops.s | 2 +- test/MC/ELF/align.s | 4 +- test/MC/ELF/basic-elf-32.s | 34 +- test/MC/ELF/basic-elf-64.s | 34 +- test/MC/ELF/cfi-adjust-cfa-offset.s | 46 + test/MC/ELF/cfi-advance-loc2.s | 8 +- test/MC/ELF/cfi-def-cfa-offset.s | 10 +- test/MC/ELF/cfi-def-cfa-register.s | 8 +- test/MC/ELF/cfi-def-cfa.s | 8 +- test/MC/ELF/cfi-offset.s | 8 +- test/MC/ELF/cfi-rel-offset.s | 49 + test/MC/ELF/cfi-rel-offset2.s | 41 + test/MC/ELF/cfi-remember.s | 10 +- test/MC/ELF/cfi-same-value.s | 42 + test/MC/ELF/cfi-zero-addr-delta.s | 8 +- test/MC/ELF/cfi.s | 10 +- test/MC/ELF/comdat.s | 12 +- test/MC/ELF/common.s | 2 +- test/MC/ELF/common2.s | 2 +- test/MC/ELF/debug-line.s | 2 +- test/MC/ELF/debug-loc.s | 2 +- test/MC/ELF/empty-dwarf-lines.s | 2 +- test/MC/ELF/empty.s | 10 +- test/MC/ELF/entsize.ll | 4 +- test/MC/ELF/entsize.s | 6 +- test/MC/ELF/global-offset.s | 2 +- test/MC/ELF/got.s | 6 +- test/MC/ELF/ident.s | 2 +- test/MC/ELF/local-reloc.s | 20 +- test/MC/ELF/merge.s | 48 +- test/MC/ELF/noexec.s | 2 +- test/MC/ELF/pic-diff.s | 18 +- test/MC/ELF/relocation-386.s | 55 +- test/MC/ELF/relocation-pc.s | 8 +- test/MC/ELF/relocation.s | 16 +- test/MC/ELF/rename.s | 16 +- test/MC/ELF/section-quoting.s | 10 + test/MC/ELF/section.s | 24 +- test/MC/ELF/symref.s | 68 +- test/MC/ELF/tls-i386.s | 10 + test/MC/ELF/tls.s | 2 +- test/MC/ELF/undef2.s | 2 +- test/MC/ELF/weak-relocation.s | 15 + test/MC/ELF/weakref-reloc.s | 26 +- test/MC/ELF/weakref.s | 4 +- test/MC/MachO/darwin-x86_64-diff-relocs.s | 2 +- test/MC/MachO/section-attributes.s | 7 + test/MC/MachO/temp-labels.s | 33 + test/MC/MachO/variable-errors.s | 8 + test/MC/MachO/variable-exprs.s | 446 ++ test/MC/X86/padlock.s | 53 + test/MC/X86/x86-32-coverage.s | 8 + test/MC/X86/x86-32.s | 130 +- test/MC/X86/x86-64.s | 219 +- test/MC/X86/x86_64-encoding.s | 16 + test/Makefile | 34 +- test/TableGen/TargetInstrInfo.td | 2 +- test/Transforms/CodeGenPrepare/basic.ll | 3 +- .../Transforms/ConstProp/2002-05-03-NotOperator.ll | 2 +- test/Transforms/ConstProp/basictest.ll | 2 +- test/Transforms/ConstProp/logicaltest.ll | 2 +- test/Transforms/ConstProp/overflow-ops.ll | 41 +- test/Transforms/ConstProp/phi.ll | 2 +- test/Transforms/DeadArgElim/deadexternal.ll | 13 + .../2011-03-25-DSEMiscompile.ll | 23 + test/Transforms/GVN/invariant-simple.ll | 36 - test/Transforms/GVN/rle.ll | 100 +- .../GlobalOpt/2011-04-09-EmptyGlobalCtors.ll | 5 + test/Transforms/GlobalOpt/cxx-dtor.ll | 31 + .../IndVarSimplify/2009-04-14-shorten_iv_vars.ll | 2 +- .../IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll | 2 +- .../IndVarSimplify/2009-04-27-Floating.ll | 11 +- test/Transforms/IndVarSimplify/ada-loops.ll | 2 +- test/Transforms/IndVarSimplify/addrec-gep.ll | 2 +- test/Transforms/IndVarSimplify/ashr-tripcount.ll | 2 +- test/Transforms/IndVarSimplify/iv-sext.ll | 2 +- test/Transforms/IndVarSimplify/iv-zext.ll | 2 +- test/Transforms/IndVarSimplify/max-pointer.ll | 2 +- test/Transforms/IndVarSimplify/pointer.ll | 2 +- .../IndVarSimplify/preserve-gep-loop-variant.ll | 2 +- .../IndVarSimplify/preserve-gep-nested.ll | 2 +- .../IndVarSimplify/preserve-gep-remainder.ll | 2 +- test/Transforms/IndVarSimplify/preserve-gep.ll | 2 +- .../InstCombine/2011-03-08-SRemMinusOneBadOpt.ll | 12 + test/Transforms/InstCombine/ExtractCast.ll | 27 + test/Transforms/InstCombine/and-or-not.ll | 2 +- test/Transforms/InstCombine/and2.ll | 9 + test/Transforms/InstCombine/debuginfo.ll | 57 + test/Transforms/InstCombine/div.ll | 38 +- test/Transforms/InstCombine/fcmp.ll | 60 + test/Transforms/InstCombine/fdiv.ll | 25 + test/Transforms/InstCombine/fold-bin-operand.ll | 15 +- test/Transforms/InstCombine/gep-addrspace.ll | 19 + test/Transforms/InstCombine/icmp.ll | 133 + test/Transforms/InstCombine/intrinsics.ll | 27 + test/Transforms/InstCombine/merge-icmp.ll | 29 + test/Transforms/InstCombine/or.ll | 19 + test/Transforms/InstCombine/phi.ll | 86 +- test/Transforms/InstCombine/select.ll | 35 + test/Transforms/InstCombine/sext.ll | 58 + test/Transforms/InstCombine/shift.ll | 21 + test/Transforms/InstCombine/sign-test-and-or.ll | 79 + test/Transforms/InstCombine/strcpy_chk-64.ll | 18 + .../Transforms/InstCombine/udivrem-change-width.ll | 45 +- test/Transforms/InstSimplify/compare.ll | 155 +- test/Transforms/InstSimplify/rem.ll | 17 + .../Transforms/Internalize/available_externally.ll | 16 + .../JumpThreading/2011-04-02-SimplifyDeadBlock.ll | 32 + .../Transforms/JumpThreading/2011-04-14-InfLoop.ll | 31 + test/Transforms/JumpThreading/pr9331.ll | 50 + .../LCSSA/2006-06-03-IncorrectIDFPhis.ll | 2 - test/Transforms/LCSSA/unused-phis.ll | 38 + .../Transforms/LICM/2007-10-01-PromoteSafeValue.ll | 4 +- .../LICM/2011-04-06-HoistMissedASTUpdate.ll | 32 + .../LICM/2011-04-06-PromoteResultOfPromotion.ll | 37 + test/Transforms/LICM/2011-04-09-RAUW-AST.ll | 49 + test/Transforms/LICM/debug-value.ll | 62 + test/Transforms/LoopIdiom/debug-line.ll | 49 + test/Transforms/LoopRotate/crash.ll | 16 + test/Transforms/LoopSimplify/merge-exits.ll | 2 +- .../LoopStrengthReduce/invariant_value_first.ll | 3 +- .../invariant_value_first_arg.ll | 3 +- .../LoopStrengthReduce/ops_after_indvar.ll | 4 +- .../var_stride_used_by_compare.ll | 2 +- test/Transforms/PhaseOrdering/basic.ll | 117 + test/Transforms/Reassociate/crash.ll | 25 + test/Transforms/Reassociate/secondary.ll | 24 + test/Transforms/SCCP/apint-basictest.ll | 2 +- test/Transforms/SCCP/apint-basictest2.ll | 2 +- test/Transforms/SCCP/apint-basictest3.ll | 2 +- test/Transforms/SCCP/apint-basictest4.ll | 2 +- .../SRETPromotion/2008-03-11-attributes.ll | 7 - .../2008-06-04-function-pointer-passing.ll | 24 - .../SRETPromotion/2008-06-05-non-call-use.ll | 20 - test/Transforms/SRETPromotion/basictest.ll | 33 - test/Transforms/SRETPromotion/dg.exp | 3 - .../Transforms/ScalarRepl/2008-01-29-PromoteBug.ll | 2 +- .../ScalarRepl/2008-06-05-loadstore-agg.ll | 4 +- test/Transforms/ScalarRepl/dg.exp | 2 +- test/Transforms/ScalarRepl/inline-vector.ll | 53 + test/Transforms/ScalarRepl/only-memcpy-uses.ll | 27 + test/Transforms/ScalarRepl/union-pointer.ll | 2 +- test/Transforms/ScalarRepl/vector_promote.ll | 167 +- test/Transforms/SimplifyCFG/2006-08-03-Crash.ll | 4 +- .../SimplifyCFG/2008-05-16-PHIBlockMerge.ll | 2 +- .../SimplifyCFG/2011-03-08-UnreachableUse.ll | 31 + test/Transforms/SimplifyCFG/PhiBlockMerge.ll | 1 + test/Transforms/SimplifyCFG/PhiEliminate2.ll | 15 +- test/Transforms/SimplifyCFG/PhiEliminate3.ll | 34 + .../Transforms/SimplifyCFG/UnreachableEliminate.ll | 56 +- test/Transforms/SimplifyCFG/branch-fold-dbg.ll | 58 + test/Transforms/SimplifyCFG/hoist-dbgvalue.ll | 53 + .../SimplifyCFG/switch-on-const-select.ll | 138 + test/Transforms/SimplifyCFG/trap-debugloc.ll | 19 + test/Transforms/SimplifyLibCalls/debug-line.ll | 24 + test/Transforms/SimplifyLibCalls/half-powr.ll | 46 - test/Transforms/SimplifyLibCalls/iprintf.ll | 71 + 619 files changed, 16019 insertions(+), 6027 deletions(-) create mode 100644 test/Analysis/BasicAA/intrinsics.ll delete mode 100644 test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll delete mode 100644 test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll delete mode 100644 test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll create mode 100644 test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll create mode 100644 test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll create mode 100644 test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll delete mode 100644 test/CodeGen/ARM/2010-12-13-reloc-pic.ll create mode 100644 test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll create mode 100644 test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll create mode 100644 test/CodeGen/ARM/2011-03-23-PeepholeBug.ll create mode 100644 test/CodeGen/ARM/2011-04-07-schediv.ll create mode 100644 test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll create mode 100644 test/CodeGen/ARM/2011-04-12-AlignBug.ll create mode 100644 test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll create mode 100644 test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll create mode 100644 test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll create mode 100644 test/CodeGen/ARM/2011-04-26-SchedTweak.ll create mode 100644 test/CodeGen/ARM/2011-04-27-IfCvtBug.ll create mode 100644 test/CodeGen/ARM/avoid-cpsr-rmw.ll create mode 100644 test/CodeGen/ARM/crash-greedy.ll create mode 100644 test/CodeGen/ARM/debug-info-d16-reg.ll create mode 100644 test/CodeGen/ARM/debug-info-qreg.ll create mode 100644 test/CodeGen/ARM/debug-info-s16-reg.ll create mode 100644 test/CodeGen/ARM/divmod.ll create mode 100644 test/CodeGen/ARM/fast-isel-pred.ll create mode 100644 test/CodeGen/ARM/fast-isel-redefinition.ll create mode 100644 test/CodeGen/ARM/fp-arg-shuffle.ll create mode 100644 test/CodeGen/ARM/int-to-fp.ll create mode 100644 test/CodeGen/ARM/neon_shift.ll create mode 100644 test/CodeGen/ARM/peephole-bitcast.ll create mode 100644 test/CodeGen/ARM/shuffle.ll create mode 100644 test/CodeGen/ARM/undef-sext.ll create mode 100644 test/CodeGen/ARM/vbsl-constant.ll create mode 100644 test/CodeGen/Mips/addc.ll create mode 100644 test/CodeGen/Mips/analyzebranch.ll create mode 100644 test/CodeGen/Mips/blockaddr.ll create mode 100644 test/CodeGen/Mips/buildpairextractelementf64.ll create mode 100644 test/CodeGen/Mips/divrem.ll create mode 100644 test/CodeGen/Mips/fpbr.ll create mode 100644 test/CodeGen/Mips/fpcmp.ll create mode 100644 test/CodeGen/Mips/internalfunc.ll create mode 100644 test/CodeGen/Mips/largeimm1.ll create mode 100644 test/CodeGen/Mips/o32_cc_vararg.ll create mode 100644 test/CodeGen/Mips/select.ll create mode 100644 test/CodeGen/PTX/bitwise.ll create mode 100644 test/CodeGen/PTX/bra.ll create mode 100644 test/CodeGen/PTX/fdiv-sm10.ll create mode 100644 test/CodeGen/PTX/fdiv-sm13.ll create mode 100644 test/CodeGen/PTX/intrinsic.ll create mode 100644 test/CodeGen/PTX/llvm-intrinsic.ll create mode 100644 test/CodeGen/PTX/mad.ll create mode 100644 test/CodeGen/PTX/mul.ll create mode 100644 test/CodeGen/PTX/parameter-order.ll create mode 100644 test/CodeGen/PTX/setp.ll delete mode 100644 test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll create mode 100644 test/CodeGen/Thumb/rev.ll create mode 100644 test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll delete mode 100644 test/CodeGen/Thumb2/thumb2-ror2.ll delete mode 100644 test/CodeGen/X86/2008-08-05-SpillerBug.ll delete mode 100644 test/CodeGen/X86/2009-03-11-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2011-02-27-Fpextend.ll create mode 100644 test/CodeGen/X86/2011-03-02-DAGCombiner.ll create mode 100644 test/CodeGen/X86/2011-03-08-Sched-crash.ll create mode 100644 test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll create mode 100644 test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll create mode 100644 test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll create mode 100644 test/CodeGen/X86/3dnow-intrinsics.ll create mode 100644 test/CodeGen/X86/SIMD/dg.exp create mode 100644 test/CodeGen/X86/SIMD/notvunpcklpd.ll create mode 100644 test/CodeGen/X86/SIMD/notvunpcklps.ll create mode 100644 test/CodeGen/X86/SIMD/vunpcklpd.ll create mode 100644 test/CodeGen/X86/SIMD/vunpcklps.ll create mode 100644 test/CodeGen/X86/adde-carry.ll create mode 100644 test/CodeGen/X86/bool-zext.ll create mode 100644 test/CodeGen/X86/dbg-declare-arg.ll create mode 100644 test/CodeGen/X86/dbg-file-name.ll delete mode 100644 test/CodeGen/X86/fast-isel-shift-imm.ll create mode 100644 test/CodeGen/X86/fast-isel-x86-64.ll create mode 100644 test/CodeGen/X86/fold-zext-trunc.ll create mode 100644 test/CodeGen/X86/fp-trunc.ll create mode 100644 test/CodeGen/X86/lsr-quadratic-expand.ll create mode 100644 test/CodeGen/X86/lsr-redundant-addressing.ll delete mode 100644 test/CodeGen/X86/mcinst-lowering-cmp0.ll create mode 100644 test/CodeGen/X86/narrow-shl-cst.ll create mode 100644 test/CodeGen/X86/no-cfi.ll create mode 100644 test/CodeGen/X86/phi-bit-propagation.ll delete mode 100644 test/CodeGen/X86/phi-constants.ll create mode 100644 test/CodeGen/X86/pr9743.ll create mode 100644 test/CodeGen/X86/shrink-compare.ll create mode 100644 test/CodeGen/X86/tailcall-returndup-void.ll delete mode 100644 test/CodeGen/X86/umulo-64.ll create mode 100644 test/CodeGen/X86/unreachable-stack-protector.ll create mode 100644 test/CodeGen/X86/vec_uint_to_fp.ll create mode 100644 test/CodeGen/X86/win64_alloca_dynalloca.ll create mode 100644 test/CodeGen/XCore/ps-intrinsics.ll create mode 100644 test/CodeGen/XCore/scavenging.ll create mode 100644 test/CodeGen/XCore/sr-intrinsics.ll create mode 100644 test/CodeGen/XCore/threads.ll create mode 100644 test/DebugInfo/array.ll delete mode 100644 test/FrontendC++/2009-07-15-LineNumbers.cpp create mode 100644 test/FrontendC/2011-03-02-UnionInitializer.c create mode 100644 test/FrontendC/2011-03-08-ZeroFieldUnionInitializer.c create mode 100644 test/FrontendC/2011-03-31-ArrayRefFolding.c delete mode 100644 test/FrontendC/cstring-align.c create mode 100644 test/FrontendC/mmx-inline-asm.c create mode 100644 test/FrontendC/vla-3.c create mode 100644 test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m create mode 100644 test/FrontendObjC/2011-03-08-IVarLookup.m create mode 100644 test/MC/ARM/arm_addrmode2.s create mode 100644 test/MC/ARM/arm_addrmode3.s create mode 100644 test/MC/AsmParser/dot-symbol.s create mode 100644 test/MC/COFF/diff.s create mode 100644 test/MC/Disassembler/ARM/invalid-BFI-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-DMB-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-DSB-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRT-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LSL-regform.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MCR-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVr-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MSRi-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-RSC-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SBFX-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SRS-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SSAT-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SXTB-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VQADD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt create mode 100644 test/MC/Disassembler/ARM/thumb-printf.txt create mode 100644 test/MC/ELF/cfi-adjust-cfa-offset.s create mode 100644 test/MC/ELF/cfi-rel-offset.s create mode 100644 test/MC/ELF/cfi-rel-offset2.s create mode 100644 test/MC/ELF/cfi-same-value.s create mode 100644 test/MC/ELF/section-quoting.s create mode 100644 test/MC/ELF/weak-relocation.s create mode 100644 test/MC/MachO/section-attributes.s create mode 100644 test/MC/MachO/temp-labels.s create mode 100644 test/MC/MachO/variable-errors.s create mode 100644 test/MC/MachO/variable-exprs.s create mode 100644 test/MC/X86/padlock.s create mode 100644 test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll delete mode 100644 test/Transforms/GVN/invariant-simple.ll create mode 100644 test/Transforms/GlobalOpt/2011-04-09-EmptyGlobalCtors.ll create mode 100644 test/Transforms/GlobalOpt/cxx-dtor.ll create mode 100644 test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll create mode 100644 test/Transforms/InstCombine/ExtractCast.ll create mode 100644 test/Transforms/InstCombine/debuginfo.ll create mode 100644 test/Transforms/InstCombine/fcmp.ll create mode 100644 test/Transforms/InstCombine/fdiv.ll create mode 100644 test/Transforms/InstCombine/gep-addrspace.ll create mode 100644 test/Transforms/InstCombine/merge-icmp.ll create mode 100644 test/Transforms/InstCombine/sign-test-and-or.ll create mode 100644 test/Transforms/InstCombine/strcpy_chk-64.ll create mode 100644 test/Transforms/InstSimplify/rem.ll create mode 100644 test/Transforms/Internalize/available_externally.ll create mode 100644 test/Transforms/JumpThreading/2011-04-02-SimplifyDeadBlock.ll create mode 100644 test/Transforms/JumpThreading/2011-04-14-InfLoop.ll create mode 100644 test/Transforms/JumpThreading/pr9331.ll create mode 100644 test/Transforms/LCSSA/unused-phis.ll create mode 100644 test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll create mode 100644 test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll create mode 100644 test/Transforms/LICM/2011-04-09-RAUW-AST.ll create mode 100644 test/Transforms/LICM/debug-value.ll create mode 100644 test/Transforms/LoopIdiom/debug-line.ll create mode 100644 test/Transforms/PhaseOrdering/basic.ll create mode 100644 test/Transforms/Reassociate/secondary.ll delete mode 100644 test/Transforms/SRETPromotion/2008-03-11-attributes.ll delete mode 100644 test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll delete mode 100644 test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll delete mode 100644 test/Transforms/SRETPromotion/basictest.ll delete mode 100644 test/Transforms/SRETPromotion/dg.exp create mode 100644 test/Transforms/ScalarRepl/inline-vector.ll create mode 100644 test/Transforms/ScalarRepl/only-memcpy-uses.ll create mode 100644 test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll create mode 100644 test/Transforms/SimplifyCFG/PhiEliminate3.ll create mode 100644 test/Transforms/SimplifyCFG/branch-fold-dbg.ll create mode 100644 test/Transforms/SimplifyCFG/hoist-dbgvalue.ll create mode 100644 test/Transforms/SimplifyCFG/switch-on-const-select.ll create mode 100644 test/Transforms/SimplifyCFG/trap-debugloc.ll create mode 100644 test/Transforms/SimplifyLibCalls/debug-line.ll delete mode 100644 test/Transforms/SimplifyLibCalls/half-powr.ll create mode 100644 test/Transforms/SimplifyLibCalls/iprintf.ll (limited to 'test') diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll new file mode 100644 index 000000000000..59725cfded05 --- /dev/null +++ b/test/Analysis/BasicAA/intrinsics.ll @@ -0,0 +1,39 @@ +; RUN: opt -basicaa -gvn -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" + +; BasicAA should prove that these calls don't interfere, since they are +; IntrArgReadMem and have noalias pointers. + +; CHECK: define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +entry: + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %q = getelementptr i8* %p, i64 16 +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +entry: + %q = getelementptr i8* %p, i64 16 + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind diff --git a/test/Analysis/BasicAA/store-promote.ll b/test/Analysis/BasicAA/store-promote.ll index 33d0f3a5449b..0db805c3e21e 100644 --- a/test/Analysis/BasicAA/store-promote.ll +++ b/test/Analysis/BasicAA/store-promote.ll @@ -24,7 +24,7 @@ Out: ; preds = %Loop ; The Loop block should be empty after the load/store are promoted. ; CHECK: @test1 -; CHECK: load i32* @B +; CHECK: load i32* @A ; CHECK: Loop: ; CHECK-NEXT: br i1 %c, label %Out, label %Loop ; CHECK: Out: diff --git a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll deleted file mode 100644 index b73b7f03f7e7..000000000000 --- a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: opt < %s -analyze -postdomfrontier \ -; RUN: -disable-verify -; ModuleID = '2006-09-26-PostDominanceFrontier.bc' -target datalayout = "e-p:64:64" -target triple = "alphaev67-unknown-linux-gnu" - %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] } - %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 } -@TOP = external global i64* ; [#uses=1] -@BOT = external global i64* ; [#uses=1] -@str = external global [2 x i8] ; <[2 x i8]*> [#uses=0] - -declare void @fopen() - -define void @main(i8** %argv) { -entry: - %netSelect.i507 = alloca i64, align 8 ; [#uses=0] - %topStart.i = alloca i64, align 8 ; [#uses=0] - %topEnd.i = alloca i64, align 8 ; [#uses=0] - %botStart.i = alloca i64, align 8 ; [#uses=0] - %botEnd.i = alloca i64, align 8 ; [#uses=0] - %c1.i154 = alloca i32, align 4 ; [#uses=0] - %b1.i155 = alloca i32, align 4 ; [#uses=0] - %t1.i156 = alloca i32, align 4 ; [#uses=0] - %c1.i = alloca i32, align 4 ; [#uses=0] - %b1.i = alloca i32, align 4 ; [#uses=0] - %t1.i = alloca i32, align 4 ; [#uses=0] - %netSelect.i5 = alloca i64, align 8 ; [#uses=0] - %netSelect.i = alloca i64, align 8 ; [#uses=0] - %tmp2.i = getelementptr i8** %argv, i32 1 ; [#uses=1] - %tmp3.i4 = load i8** %tmp2.i ; [#uses=0] - call void @fopen( ) - br i1 false, label %DimensionChannel.exit, label %bb.backedge.i - -bb.backedge.i: ; preds = %entry - ret void - -DimensionChannel.exit: ; preds = %entry - %tmp13.i137 = malloc i64, i32 0 ; [#uses=1] - %tmp610.i = malloc i64, i32 0 ; [#uses=1] - br label %cond_true.i143 - -cond_true.i143: ; preds = %cond_true.i143, %DimensionChannel.exit - %tmp9.i140 = getelementptr i64* %tmp13.i137, i64 0 ; [#uses=0] - %tmp12.i = getelementptr i64* %tmp610.i, i64 0 ; [#uses=0] - br i1 false, label %bb18.i144, label %cond_true.i143 - -bb18.i144: ; preds = %cond_true.i143 - call void @fopen( ) - %tmp76.i105 = malloc i64, i32 0 ; [#uses=3] - %tmp674.i = malloc i64, i32 0 ; [#uses=2] - %tmp1072.i = malloc i64, i32 0 ; [#uses=2] - %tmp1470.i = malloc i64, i32 0 ; [#uses=1] - br label %cond_true.i114 - -cond_true.i114: ; preds = %cond_true.i114, %bb18.i144 - %tmp17.i108 = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - %tmp20.i = getelementptr i64* %tmp674.i, i64 0 ; [#uses=0] - %tmp23.i111 = getelementptr i64* %tmp1470.i, i64 0 ; [#uses=0] - br i1 false, label %cond_true40.i, label %cond_true.i114 - -cond_true40.i: ; preds = %cond_true40.i, %cond_true.i114 - %tmp33.i115 = getelementptr i64* %tmp1072.i, i64 0 ; [#uses=0] - br i1 false, label %bb142.i, label %cond_true40.i - -cond_next54.i: ; preds = %cond_true76.i - %tmp57.i = getelementptr i64* %tmp55.i, i64 0 ; [#uses=0] - br i1 false, label %bb64.i, label %bb69.i - -bb64.i: ; preds = %cond_true76.i, %cond_next54.i - %tmp67.i117 = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - br i1 false, label %bb114.i, label %cond_true111.i - -bb69.i: ; preds = %cond_next54.i - br i1 false, label %bb79.i, label %cond_true76.i - -cond_true76.i: ; preds = %bb142.i, %bb69.i - %tmp48.i = getelementptr i64* %tmp46.i, i64 0 ; [#uses=0] - br i1 false, label %bb64.i, label %cond_next54.i - -bb79.i: ; preds = %bb69.i - br i1 false, label %bb114.i, label %cond_true111.i - -cond_true111.i: ; preds = %bb79.i, %bb64.i - %tmp84.i127 = getelementptr i64* %tmp46.i, i64 0 ; [#uses=0] - ret void - -bb114.i: ; preds = %bb142.i, %bb79.i, %bb64.i - %tmp117.i = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - %tmp132.i131 = getelementptr i64* %tmp674.i, i64 0 ; [#uses=0] - %tmp122.i = getelementptr i64* %tmp1072.i, i64 0 ; [#uses=0] - ret void - -bb142.i: ; preds = %cond_true40.i - %tmp46.i = load i64** @BOT ; [#uses=2] - %tmp55.i = load i64** @TOP ; [#uses=1] - br i1 false, label %bb114.i, label %cond_true76.i -} diff --git a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll deleted file mode 100644 index 1ec056bc34e0..000000000000 --- a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll +++ /dev/null @@ -1,692 +0,0 @@ -; RUN: opt < %s -postdomfrontier -disable-output - -define void @SManager() { -entry: - br label %bb.outer - -bb.outer: ; preds = %bb193, %entry - br label %bb.outer156 - -bb.loopexit: ; preds = %bb442 - br label %bb.outer156 - -bb.outer156: ; preds = %bb.loopexit, %bb.outer - br label %bb - -bb: ; preds = %bb.backedge, %bb.outer156 - br i1 false, label %cond_true, label %bb.cond_next_crit_edge - -bb.cond_next_crit_edge: ; preds = %bb - br label %cond_next - -cond_true: ; preds = %bb - br label %cond_next - -cond_next: ; preds = %cond_true, %bb.cond_next_crit_edge - br i1 false, label %cond_next.bb.backedge_crit_edge, label %cond_next107 - -cond_next.bb.backedge_crit_edge: ; preds = %cond_next - br label %bb.backedge - -bb.backedge: ; preds = %cond_true112.bb.backedge_crit_edge, %cond_next.bb.backedge_crit_edge - br label %bb - -cond_next107: ; preds = %cond_next - br i1 false, label %cond_true112, label %cond_next197 - -cond_true112: ; preds = %cond_next107 - br i1 false, label %cond_true118, label %cond_true112.bb.backedge_crit_edge - -cond_true112.bb.backedge_crit_edge: ; preds = %cond_true112 - br label %bb.backedge - -cond_true118: ; preds = %cond_true112 - br i1 false, label %bb123.preheader, label %cond_true118.bb148_crit_edge - -cond_true118.bb148_crit_edge: ; preds = %cond_true118 - br label %bb148 - -bb123.preheader: ; preds = %cond_true118 - br label %bb123 - -bb123: ; preds = %bb142.bb123_crit_edge, %bb123.preheader - br i1 false, label %bb123.bb142_crit_edge, label %cond_next.i57 - -bb123.bb142_crit_edge: ; preds = %bb123 - br label %bb142 - -cond_next.i57: ; preds = %bb123 - br i1 false, label %cond_true135, label %cond_next.i57.bb142_crit_edge - -cond_next.i57.bb142_crit_edge: ; preds = %cond_next.i57 - br label %bb142 - -cond_true135: ; preds = %cond_next.i57 - br label %bb142 - -bb142: ; preds = %cond_true135, %cond_next.i57.bb142_crit_edge, %bb123.bb142_crit_edge - br i1 false, label %bb148.loopexit, label %bb142.bb123_crit_edge - -bb142.bb123_crit_edge: ; preds = %bb142 - br label %bb123 - -bb148.loopexit: ; preds = %bb142 - br label %bb148 - -bb148: ; preds = %bb148.loopexit, %cond_true118.bb148_crit_edge - br i1 false, label %bb151.preheader, label %bb148.bb177_crit_edge - -bb148.bb177_crit_edge: ; preds = %bb148 - br label %bb177 - -bb151.preheader: ; preds = %bb148 - br label %bb151 - -bb151: ; preds = %bb171.bb151_crit_edge, %bb151.preheader - br i1 false, label %bb151.bb171_crit_edge, label %cond_next.i49 - -bb151.bb171_crit_edge: ; preds = %bb151 - br label %bb171 - -cond_next.i49: ; preds = %bb151 - br i1 false, label %cond_true164, label %cond_next.i49.bb171_crit_edge - -cond_next.i49.bb171_crit_edge: ; preds = %cond_next.i49 - br label %bb171 - -cond_true164: ; preds = %cond_next.i49 - br label %bb171 - -bb171: ; preds = %cond_true164, %cond_next.i49.bb171_crit_edge, %bb151.bb171_crit_edge - br i1 false, label %bb177.loopexit, label %bb171.bb151_crit_edge - -bb171.bb151_crit_edge: ; preds = %bb171 - br label %bb151 - -bb177.loopexit: ; preds = %bb171 - br label %bb177 - -bb177: ; preds = %bb177.loopexit, %bb148.bb177_crit_edge - br i1 false, label %bb180.preheader, label %bb177.bb193_crit_edge - -bb177.bb193_crit_edge: ; preds = %bb177 - br label %bb193 - -bb180.preheader: ; preds = %bb177 - br label %bb180 - -bb180: ; preds = %bb180.bb180_crit_edge, %bb180.preheader - br i1 false, label %bb193.loopexit, label %bb180.bb180_crit_edge - -bb180.bb180_crit_edge: ; preds = %bb180 - br label %bb180 - -bb193.loopexit: ; preds = %bb180 - br label %bb193 - -bb193: ; preds = %bb193.loopexit, %bb177.bb193_crit_edge - br label %bb.outer - -cond_next197: ; preds = %cond_next107 - br i1 false, label %cond_next210, label %cond_true205 - -cond_true205: ; preds = %cond_next197 - br i1 false, label %cond_true205.bb213_crit_edge, label %cond_true205.bb299_crit_edge - -cond_true205.bb299_crit_edge: ; preds = %cond_true205 - br label %bb299 - -cond_true205.bb213_crit_edge: ; preds = %cond_true205 - br label %bb213 - -cond_next210: ; preds = %cond_next197 - br label %bb293 - -bb213: ; preds = %bb293.bb213_crit_edge, %cond_true205.bb213_crit_edge - br i1 false, label %bb213.cond_next290_crit_edge, label %cond_true248 - -bb213.cond_next290_crit_edge: ; preds = %bb213 - br label %cond_next290 - -cond_true248: ; preds = %bb213 - br i1 false, label %cond_true248.cond_next290_crit_edge, label %cond_true255 - -cond_true248.cond_next290_crit_edge: ; preds = %cond_true248 - br label %cond_next290 - -cond_true255: ; preds = %cond_true248 - br i1 false, label %cond_true266, label %cond_true255.cond_next271_crit_edge - -cond_true255.cond_next271_crit_edge: ; preds = %cond_true255 - br label %cond_next271 - -cond_true266: ; preds = %cond_true255 - br label %cond_next271 - -cond_next271: ; preds = %cond_true266, %cond_true255.cond_next271_crit_edge - br label %cond_next290 - -cond_next290: ; preds = %cond_next271, %cond_true248.cond_next290_crit_edge, %bb213.cond_next290_crit_edge - br label %bb293 - -bb293: ; preds = %cond_next290, %cond_next210 - br i1 false, label %bb293.bb213_crit_edge, label %bb293.bb299_crit_edge - -bb293.bb299_crit_edge: ; preds = %bb293 - br label %bb299 - -bb293.bb213_crit_edge: ; preds = %bb293 - br label %bb213 - -bb299: ; preds = %bb293.bb299_crit_edge, %cond_true205.bb299_crit_edge - br i1 false, label %bb302.preheader, label %bb299.bb390_crit_edge - -bb299.bb390_crit_edge: ; preds = %bb299 - br label %bb390 - -bb302.preheader: ; preds = %bb299 - br label %bb302 - -bb302: ; preds = %bb384.bb302_crit_edge, %bb302.preheader - br i1 false, label %bb302.bb384_crit_edge, label %cond_true339 - -bb302.bb384_crit_edge: ; preds = %bb302 - br label %bb384 - -cond_true339: ; preds = %bb302 - br i1 false, label %cond_true339.bb384_crit_edge, label %cond_true346 - -cond_true339.bb384_crit_edge: ; preds = %cond_true339 - br label %bb384 - -cond_true346: ; preds = %cond_true339 - br i1 false, label %cond_true357, label %cond_true346.cond_next361_crit_edge - -cond_true346.cond_next361_crit_edge: ; preds = %cond_true346 - br label %cond_next361 - -cond_true357: ; preds = %cond_true346 - br label %cond_next361 - -cond_next361: ; preds = %cond_true357, %cond_true346.cond_next361_crit_edge - br label %bb384 - -bb384: ; preds = %cond_next361, %cond_true339.bb384_crit_edge, %bb302.bb384_crit_edge - br i1 false, label %bb390.loopexit, label %bb384.bb302_crit_edge - -bb384.bb302_crit_edge: ; preds = %bb384 - br label %bb302 - -bb390.loopexit: ; preds = %bb384 - br label %bb390 - -bb390: ; preds = %bb390.loopexit, %bb299.bb390_crit_edge - br i1 false, label %bb391.preheader, label %bb390.bb442.preheader_crit_edge - -bb390.bb442.preheader_crit_edge: ; preds = %bb390 - br label %bb442.preheader - -bb391.preheader: ; preds = %bb390 - br label %bb391 - -bb391: ; preds = %bb413.bb391_crit_edge, %bb391.preheader - br i1 false, label %bb391.bb413_crit_edge, label %cond_next404 - -bb391.bb413_crit_edge: ; preds = %bb391 - br label %bb413 - -cond_next404: ; preds = %bb391 - br i1 false, label %cond_next404.HWrite.exit_crit_edge, label %cond_next.i13 - -cond_next404.HWrite.exit_crit_edge: ; preds = %cond_next404 - br label %HWrite.exit - -cond_next.i13: ; preds = %cond_next404 - br i1 false, label %cond_next.i13.cond_next13.i_crit_edge, label %cond_true12.i - -cond_next.i13.cond_next13.i_crit_edge: ; preds = %cond_next.i13 - br label %cond_next13.i - -cond_true12.i: ; preds = %cond_next.i13 - br label %cond_next13.i - -cond_next13.i: ; preds = %cond_true12.i, %cond_next.i13.cond_next13.i_crit_edge - br i1 false, label %cond_next13.i.bb.i22_crit_edge, label %cond_next43.i - -cond_next13.i.bb.i22_crit_edge: ; preds = %cond_next13.i - br label %bb.i22 - -cond_next43.i: ; preds = %cond_next13.i - br i1 false, label %cond_next43.i.bb.i22_crit_edge, label %bb60.i - -cond_next43.i.bb.i22_crit_edge: ; preds = %cond_next43.i - br label %bb.i22 - -bb.i22: ; preds = %cond_next43.i.bb.i22_crit_edge, %cond_next13.i.bb.i22_crit_edge - br label %bb413 - -bb60.i: ; preds = %cond_next43.i - br i1 false, label %bb60.i.HWrite.exit_crit_edge, label %cond_true81.i - -bb60.i.HWrite.exit_crit_edge: ; preds = %bb60.i - br label %HWrite.exit - -cond_true81.i: ; preds = %bb60.i - br label %bb413 - -HWrite.exit: ; preds = %bb60.i.HWrite.exit_crit_edge, %cond_next404.HWrite.exit_crit_edge - br label %bb413 - -bb413: ; preds = %HWrite.exit, %cond_true81.i, %bb.i22, %bb391.bb413_crit_edge - br i1 false, label %bb442.preheader.loopexit, label %bb413.bb391_crit_edge - -bb413.bb391_crit_edge: ; preds = %bb413 - br label %bb391 - -bb442.preheader.loopexit: ; preds = %bb413 - br label %bb442.preheader - -bb442.preheader: ; preds = %bb442.preheader.loopexit, %bb390.bb442.preheader_crit_edge - br label %bb442.outer - -bb420: ; preds = %bb442 - br i1 false, label %bb439.loopexit, label %cond_next433 - -cond_next433: ; preds = %bb420 - br i1 false, label %cond_next433.HRead.exit.loopexit_crit_edge, label %cond_next.i - -cond_next433.HRead.exit.loopexit_crit_edge: ; preds = %cond_next433 - br label %HRead.exit.loopexit - -cond_next.i: ; preds = %cond_next433 - br i1 false, label %cond_true9.i, label %cond_false223.i - -cond_true9.i: ; preds = %cond_next.i - switch i32 0, label %cond_false.i [ - i32 1, label %cond_true9.i.cond_true15.i_crit_edge - i32 5, label %cond_true9.i.cond_true15.i_crit_edge9 - ] - -cond_true9.i.cond_true15.i_crit_edge9: ; preds = %cond_true9.i - br label %cond_true15.i - -cond_true9.i.cond_true15.i_crit_edge: ; preds = %cond_true9.i - br label %cond_true15.i - -cond_true15.i: ; preds = %cond_true9.i.cond_true15.i_crit_edge, %cond_true9.i.cond_true15.i_crit_edge9 - br i1 false, label %cond_true15.i.cond_true44.i_crit_edge, label %cond_true15.i.cond_false49.i_crit_edge - -cond_true15.i.cond_false49.i_crit_edge: ; preds = %cond_true15.i - br label %cond_false49.i - -cond_true15.i.cond_true44.i_crit_edge: ; preds = %cond_true15.i - br label %cond_true44.i - -cond_false.i: ; preds = %cond_true9.i - br i1 false, label %cond_false.i.cond_next39.i_crit_edge, label %cond_true30.i - -cond_false.i.cond_next39.i_crit_edge: ; preds = %cond_false.i - br label %cond_next39.i - -cond_true30.i: ; preds = %cond_false.i - br label %cond_next39.i - -cond_next39.i: ; preds = %cond_true30.i, %cond_false.i.cond_next39.i_crit_edge - br i1 false, label %cond_next39.i.cond_true44.i_crit_edge, label %cond_next39.i.cond_false49.i_crit_edge - -cond_next39.i.cond_false49.i_crit_edge: ; preds = %cond_next39.i - br label %cond_false49.i - -cond_next39.i.cond_true44.i_crit_edge: ; preds = %cond_next39.i - br label %cond_true44.i - -cond_true44.i: ; preds = %cond_next39.i.cond_true44.i_crit_edge, %cond_true15.i.cond_true44.i_crit_edge - br i1 false, label %cond_true44.i.cond_next70.i_crit_edge, label %cond_true44.i.cond_true61.i_crit_edge - -cond_true44.i.cond_true61.i_crit_edge: ; preds = %cond_true44.i - br label %cond_true61.i - -cond_true44.i.cond_next70.i_crit_edge: ; preds = %cond_true44.i - br label %cond_next70.i - -cond_false49.i: ; preds = %cond_next39.i.cond_false49.i_crit_edge, %cond_true15.i.cond_false49.i_crit_edge - br i1 false, label %cond_false49.i.cond_next70.i_crit_edge, label %cond_false49.i.cond_true61.i_crit_edge - -cond_false49.i.cond_true61.i_crit_edge: ; preds = %cond_false49.i - br label %cond_true61.i - -cond_false49.i.cond_next70.i_crit_edge: ; preds = %cond_false49.i - br label %cond_next70.i - -cond_true61.i: ; preds = %cond_false49.i.cond_true61.i_crit_edge, %cond_true44.i.cond_true61.i_crit_edge - br i1 false, label %cond_true61.i.cond_next70.i_crit_edge, label %cond_true67.i - -cond_true61.i.cond_next70.i_crit_edge: ; preds = %cond_true61.i - br label %cond_next70.i - -cond_true67.i: ; preds = %cond_true61.i - br label %cond_next70.i - -cond_next70.i: ; preds = %cond_true67.i, %cond_true61.i.cond_next70.i_crit_edge, %cond_false49.i.cond_next70.i_crit_edge, %cond_true44.i.cond_next70.i_crit_edge - br i1 false, label %cond_true77.i, label %cond_next81.i - -cond_true77.i: ; preds = %cond_next70.i - br label %bb442.outer.backedge - -cond_next81.i: ; preds = %cond_next70.i - br i1 false, label %cond_true87.i, label %cond_false94.i - -cond_true87.i: ; preds = %cond_next81.i - br i1 false, label %cond_true87.i.cond_true130.i_crit_edge, label %cond_true87.i.cond_next135.i_crit_edge - -cond_true87.i.cond_next135.i_crit_edge: ; preds = %cond_true87.i - br label %cond_next135.i - -cond_true87.i.cond_true130.i_crit_edge: ; preds = %cond_true87.i - br label %cond_true130.i - -cond_false94.i: ; preds = %cond_next81.i - switch i32 0, label %cond_false94.i.cond_next125.i_crit_edge [ - i32 1, label %cond_false94.i.cond_true100.i_crit_edge - i32 5, label %cond_false94.i.cond_true100.i_crit_edge10 - ] - -cond_false94.i.cond_true100.i_crit_edge10: ; preds = %cond_false94.i - br label %cond_true100.i - -cond_false94.i.cond_true100.i_crit_edge: ; preds = %cond_false94.i - br label %cond_true100.i - -cond_false94.i.cond_next125.i_crit_edge: ; preds = %cond_false94.i - br label %cond_next125.i - -cond_true100.i: ; preds = %cond_false94.i.cond_true100.i_crit_edge, %cond_false94.i.cond_true100.i_crit_edge10 - br i1 false, label %cond_true107.i, label %cond_true100.i.cond_next109.i_crit_edge - -cond_true100.i.cond_next109.i_crit_edge: ; preds = %cond_true100.i - br label %cond_next109.i - -cond_true107.i: ; preds = %cond_true100.i - br label %cond_next109.i - -cond_next109.i: ; preds = %cond_true107.i, %cond_true100.i.cond_next109.i_crit_edge - br i1 false, label %cond_next109.i.cond_next125.i_crit_edge, label %cond_true116.i - -cond_next109.i.cond_next125.i_crit_edge: ; preds = %cond_next109.i - br label %cond_next125.i - -cond_true116.i: ; preds = %cond_next109.i - br label %cond_next125.i - -cond_next125.i: ; preds = %cond_true116.i, %cond_next109.i.cond_next125.i_crit_edge, %cond_false94.i.cond_next125.i_crit_edge - br i1 false, label %cond_next125.i.cond_true130.i_crit_edge, label %cond_next125.i.cond_next135.i_crit_edge - -cond_next125.i.cond_next135.i_crit_edge: ; preds = %cond_next125.i - br label %cond_next135.i - -cond_next125.i.cond_true130.i_crit_edge: ; preds = %cond_next125.i - br label %cond_true130.i - -cond_true130.i: ; preds = %cond_next125.i.cond_true130.i_crit_edge, %cond_true87.i.cond_true130.i_crit_edge - br label %cond_next135.i - -cond_next135.i: ; preds = %cond_true130.i, %cond_next125.i.cond_next135.i_crit_edge, %cond_true87.i.cond_next135.i_crit_edge - br i1 false, label %cond_true142.i, label %cond_next135.i.cond_next149.i_crit_edge - -cond_next135.i.cond_next149.i_crit_edge: ; preds = %cond_next135.i - br label %cond_next149.i - -cond_true142.i: ; preds = %cond_next135.i - br label %cond_next149.i - -cond_next149.i: ; preds = %cond_true142.i, %cond_next135.i.cond_next149.i_crit_edge - br i1 false, label %cond_true156.i, label %cond_next149.i.cond_next163.i_crit_edge - -cond_next149.i.cond_next163.i_crit_edge: ; preds = %cond_next149.i - br label %cond_next163.i - -cond_true156.i: ; preds = %cond_next149.i - br label %cond_next163.i - -cond_next163.i: ; preds = %cond_true156.i, %cond_next149.i.cond_next163.i_crit_edge - br i1 false, label %cond_true182.i, label %cond_next163.i.cond_next380.i_crit_edge - -cond_next163.i.cond_next380.i_crit_edge: ; preds = %cond_next163.i - br label %cond_next380.i - -cond_true182.i: ; preds = %cond_next163.i - br i1 false, label %cond_true182.i.cond_next380.i_crit_edge, label %cond_true196.i - -cond_true182.i.cond_next380.i_crit_edge: ; preds = %cond_true182.i - br label %cond_next380.i - -cond_true196.i: ; preds = %cond_true182.i - br i1 false, label %cond_true210.i, label %cond_true196.i.cond_next380.i_crit_edge - -cond_true196.i.cond_next380.i_crit_edge: ; preds = %cond_true196.i - br label %cond_next380.i - -cond_true210.i: ; preds = %cond_true196.i - br i1 false, label %cond_true216.i, label %cond_true210.i.cond_next380.i_crit_edge - -cond_true210.i.cond_next380.i_crit_edge: ; preds = %cond_true210.i - br label %cond_next380.i - -cond_true216.i: ; preds = %cond_true210.i - br label %cond_next380.i - -cond_false223.i: ; preds = %cond_next.i - br i1 false, label %cond_true229.i, label %cond_false355.i - -cond_true229.i: ; preds = %cond_false223.i - br i1 false, label %cond_true229.i.HRead.exit.loopexit_crit_edge, label %cond_next243.i - -cond_true229.i.HRead.exit.loopexit_crit_edge: ; preds = %cond_true229.i - br label %HRead.exit.loopexit - -cond_next243.i: ; preds = %cond_true229.i - br i1 false, label %cond_true248.i, label %cond_false255.i - -cond_true248.i: ; preds = %cond_next243.i - br label %cond_next260.i - -cond_false255.i: ; preds = %cond_next243.i - br label %cond_next260.i - -cond_next260.i: ; preds = %cond_false255.i, %cond_true248.i - br i1 false, label %cond_true267.i, label %cond_next273.i - -cond_true267.i: ; preds = %cond_next260.i - br label %bb442.backedge - -bb442.backedge: ; preds = %bb.i, %cond_true267.i - br label %bb442 - -cond_next273.i: ; preds = %cond_next260.i - br i1 false, label %cond_true281.i, label %cond_next273.i.cond_next288.i_crit_edge - -cond_next273.i.cond_next288.i_crit_edge: ; preds = %cond_next273.i - br label %cond_next288.i - -cond_true281.i: ; preds = %cond_next273.i - br label %cond_next288.i - -cond_next288.i: ; preds = %cond_true281.i, %cond_next273.i.cond_next288.i_crit_edge - br i1 false, label %cond_true295.i, label %cond_next288.i.cond_next302.i_crit_edge - -cond_next288.i.cond_next302.i_crit_edge: ; preds = %cond_next288.i - br label %cond_next302.i - -cond_true295.i: ; preds = %cond_next288.i - br label %cond_next302.i - -cond_next302.i: ; preds = %cond_true295.i, %cond_next288.i.cond_next302.i_crit_edge - br i1 false, label %cond_next302.i.cond_next380.i_crit_edge, label %cond_true328.i - -cond_next302.i.cond_next380.i_crit_edge: ; preds = %cond_next302.i - br label %cond_next380.i - -cond_true328.i: ; preds = %cond_next302.i - br i1 false, label %cond_true343.i, label %cond_true328.i.cond_next380.i_crit_edge - -cond_true328.i.cond_next380.i_crit_edge: ; preds = %cond_true328.i - br label %cond_next380.i - -cond_true343.i: ; preds = %cond_true328.i - br i1 false, label %cond_true349.i, label %cond_true343.i.cond_next380.i_crit_edge - -cond_true343.i.cond_next380.i_crit_edge: ; preds = %cond_true343.i - br label %cond_next380.i - -cond_true349.i: ; preds = %cond_true343.i - br label %cond_next380.i - -cond_false355.i: ; preds = %cond_false223.i - br i1 false, label %cond_false355.i.bb.i_crit_edge, label %cond_next363.i - -cond_false355.i.bb.i_crit_edge: ; preds = %cond_false355.i - br label %bb.i - -cond_next363.i: ; preds = %cond_false355.i - br i1 false, label %bb377.i, label %cond_next363.i.bb.i_crit_edge - -cond_next363.i.bb.i_crit_edge: ; preds = %cond_next363.i - br label %bb.i - -bb.i: ; preds = %cond_next363.i.bb.i_crit_edge, %cond_false355.i.bb.i_crit_edge - br label %bb442.backedge - -bb377.i: ; preds = %cond_next363.i - br label %cond_next380.i - -cond_next380.i: ; preds = %bb377.i, %cond_true349.i, %cond_true343.i.cond_next380.i_crit_edge, %cond_true328.i.cond_next380.i_crit_edge, %cond_next302.i.cond_next380.i_crit_edge, %cond_true216.i, %cond_true210.i.cond_next380.i_crit_edge, %cond_true196.i.cond_next380.i_crit_edge, %cond_true182.i.cond_next380.i_crit_edge, %cond_next163.i.cond_next380.i_crit_edge - br i1 false, label %cond_next380.i.HRead.exit_crit_edge, label %cond_true391.i - -cond_next380.i.HRead.exit_crit_edge: ; preds = %cond_next380.i - br label %HRead.exit - -cond_true391.i: ; preds = %cond_next380.i - br label %bb442.outer.backedge - -bb442.outer.backedge: ; preds = %bb439, %cond_true391.i, %cond_true77.i - br label %bb442.outer - -HRead.exit.loopexit: ; preds = %cond_true229.i.HRead.exit.loopexit_crit_edge, %cond_next433.HRead.exit.loopexit_crit_edge - br label %HRead.exit - -HRead.exit: ; preds = %HRead.exit.loopexit, %cond_next380.i.HRead.exit_crit_edge - br label %bb439 - -bb439.loopexit: ; preds = %bb420 - br label %bb439 - -bb439: ; preds = %bb439.loopexit, %HRead.exit - br label %bb442.outer.backedge - -bb442.outer: ; preds = %bb442.outer.backedge, %bb442.preheader - br label %bb442 - -bb442: ; preds = %bb442.outer, %bb442.backedge - br i1 false, label %bb420, label %bb.loopexit -} - -define void @Invalidate() { -entry: - br i1 false, label %cond_false, label %cond_true - -cond_true: ; preds = %entry - br i1 false, label %cond_true40, label %cond_true.cond_next_crit_edge - -cond_true.cond_next_crit_edge: ; preds = %cond_true - br label %cond_next - -cond_true40: ; preds = %cond_true - br label %cond_next - -cond_next: ; preds = %cond_true40, %cond_true.cond_next_crit_edge - br i1 false, label %cond_true68, label %cond_next.cond_next73_crit_edge - -cond_next.cond_next73_crit_edge: ; preds = %cond_next - br label %cond_next73 - -cond_true68: ; preds = %cond_next - br label %cond_next73 - -cond_next73: ; preds = %cond_true68, %cond_next.cond_next73_crit_edge - br i1 false, label %cond_true91, label %cond_next73.cond_next96_crit_edge - -cond_next73.cond_next96_crit_edge: ; preds = %cond_next73 - br label %cond_next96 - -cond_true91: ; preds = %cond_next73 - br label %cond_next96 - -cond_next96: ; preds = %cond_true91, %cond_next73.cond_next96_crit_edge - br i1 false, label %cond_next96.cond_next112_crit_edge, label %cond_true105 - -cond_next96.cond_next112_crit_edge: ; preds = %cond_next96 - br label %cond_next112 - -cond_true105: ; preds = %cond_next96 - br label %cond_next112 - -cond_next112: ; preds = %cond_true105, %cond_next96.cond_next112_crit_edge - br i1 false, label %cond_next112.cond_next127_crit_edge, label %cond_true119 - -cond_next112.cond_next127_crit_edge: ; preds = %cond_next112 - br label %cond_next127 - -cond_true119: ; preds = %cond_next112 - br label %cond_next127 - -cond_next127: ; preds = %cond_true119, %cond_next112.cond_next127_crit_edge - br i1 false, label %cond_next141, label %cond_true134 - -cond_true134: ; preds = %cond_next127 - br i1 false, label %cond_true134.bb161_crit_edge, label %cond_true134.bb_crit_edge - -cond_true134.bb_crit_edge: ; preds = %cond_true134 - br label %bb - -cond_true134.bb161_crit_edge: ; preds = %cond_true134 - br label %bb161 - -cond_next141: ; preds = %cond_next127 - br label %bb154 - -bb: ; preds = %bb154.bb_crit_edge, %cond_true134.bb_crit_edge - br label %bb154 - -bb154: ; preds = %bb, %cond_next141 - br i1 false, label %bb154.bb161_crit_edge, label %bb154.bb_crit_edge - -bb154.bb_crit_edge: ; preds = %bb154 - br label %bb - -bb154.bb161_crit_edge: ; preds = %bb154 - br label %bb161 - -bb161: ; preds = %bb154.bb161_crit_edge, %cond_true134.bb161_crit_edge - br i1 false, label %bb161.cond_next201_crit_edge, label %cond_true198 - -bb161.cond_next201_crit_edge: ; preds = %bb161 - br label %cond_next201 - -cond_true198: ; preds = %bb161 - br label %cond_next201 - -cond_next201: ; preds = %cond_true198, %bb161.cond_next201_crit_edge - br i1 false, label %cond_next212, label %cond_true206 - -cond_true206: ; preds = %cond_next201 - br label %UnifiedReturnBlock - -cond_false: ; preds = %entry - br label %UnifiedReturnBlock - -cond_next212: ; preds = %cond_next201 - br label %UnifiedReturnBlock - -UnifiedReturnBlock: ; preds = %cond_next212, %cond_false, %cond_true206 - ret void -} diff --git a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll deleted file mode 100644 index 767e5db94ce8..000000000000 --- a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: opt < %s -postdomfrontier -disable-output - -define void @args_out_of_range() { -entry: - br label %bb - -bb: ; preds = %bb, %entry - br label %bb -} - -define void @args_out_of_range_3() { -entry: - br label %bb - -bb: ; preds = %bb, %entry - br label %bb -} - -define void @Feq() { -entry: - br i1 false, label %cond_true, label %cond_next - -cond_true: ; preds = %entry - unreachable - -cond_next: ; preds = %entry - unreachable -} diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll index d986387099c3..377a84d389c1 100644 --- a/test/Analysis/RegionInfo/next.ll +++ b/test/Analysis/RegionInfo/next.ll @@ -32,8 +32,8 @@ __label_000020: ; preds = %__label_002001, %bb ; CHECK-NOT: => ; CHECK: [0] entry => ; CHECK-NEXT: [1] __label_002001.outer => __label_000020 -; CHECK-NEXT; [2] bb197 => bb229 -; CHECK-NEXT; [3] bb224 => bb229 +; CHECK-NEXT: [2] bb197 => bb229 +; CHECK-NEXT: [3] bb224 => bb229 ; STAT: 4 region - The # of regions ; STAT: 1 region - The # of simple regions diff --git a/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll new file mode 100644 index 000000000000..9f17e27577c2 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll @@ -0,0 +1,34 @@ +; RUN: opt -indvars %s +; PR9424: Attempt to use a SCEVCouldNotCompute object! +; The inner loop computes the Step and Start of the outer loop. +; Call that Vexit. The outer End value is max(2,Vexit), because +; the condition "icmp %4 < 2" does not guard the outer loop. +; SCEV knows that Vexit has range [2,4], so End == Vexit == Start. +; Now we have ExactBECount == 0. However, MinStart == 2 and MaxEnd == 4. +; Since the stride is variable and may wrap, we cannot compute +; MaxBECount. SCEV should override MaxBECount with ExactBECount. + +define void @bar() nounwind { +entry: + %. = select i1 undef, i32 2, i32 1 + br label %"5.preheader" + +"4": ; preds = %"5.preheader", %"4" + %0 = phi i32 [ 0, %"5.preheader" ], [ %1, %"4" ] + %1 = add nsw i32 %0, 1 + %2 = icmp sgt i32 %., %1 + br i1 %2, label %"4", label %"9" + +"9": ; preds = %"4" + %3 = add i32 %6, 1 + %4 = add i32 %3, %1 + %5 = icmp slt i32 %4, 2 + br i1 %5, label %"5.preheader", label %return + +"5.preheader": ; preds = %"9", %entry + %6 = phi i32 [ 0, %entry ], [ %4, %"9" ] + br label %"4" + +return: ; preds = %"9" + ret void +} diff --git a/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll new file mode 100644 index 000000000000..1600d5f05a18 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -analyze -iv-users +; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to +; constant zero. + +define signext i8 @func_14(i8 signext %p_18) nounwind readnone ssp { +entry: + br label %for.inc + +for.inc: + %p_17.addr.012 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %add = add nsw i32 %p_17.addr.012, 1 + br i1 false, label %for.inc, label %for.cond + +for.cond: + %tobool.i = icmp ult i32 %add, 8192 + %shl.i = select i1 %tobool.i, i32 13, i32 0 + %shl.left.i = shl i32 %add, %shl.i + %conv.i4 = trunc i32 %shl.left.i to i8 + br i1 undef, label %for.inc9, label %if.then + +for.inc9: + %p_18.addr.011 = phi i8 [ %add12, %for.inc9 ], [ %p_18, %for.cond ] + %add12 = add i8 %p_18.addr.011, 1 + %mul.i2 = mul i8 %add12, %conv.i4 + %mul.i2.lobit = lshr i8 %mul.i2, 7 + %lor.ext.shr.i = select i1 undef, i8 %mul.i2.lobit, i8 %mul.i2 + %tobool = icmp eq i8 %lor.ext.shr.i, 0 + br i1 %tobool, label %for.inc9, label %if.then + +if.then: + ret i8 0 + +} \ No newline at end of file diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll index 4cd9a6de48c5..8969a5ad4ceb 100644 --- a/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -19,11 +19,11 @@ bb: ; preds = %bb.nph, %bb1 %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; [#uses=5] ; CHECK: %1 = sext i32 %i.01 to i64 -; CHECK: --> {0,+,2}<%bb> +; CHECK: --> {0,+,2}<%bb> %1 = sext i32 %i.01 to i64 ; [#uses=1] ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1 -; CHECK: --> {%d,+,16}<%bb> +; CHECK: --> {%d,+,16}<%bb> %2 = getelementptr inbounds double* %d, i64 %1 ; [#uses=1] %3 = load double* %2, align 8 ; [#uses=1] @@ -33,11 +33,11 @@ bb: ; preds = %bb.nph, %bb1 %7 = or i32 %i.01, 1 ; [#uses=1] ; CHECK: %8 = sext i32 %7 to i64 -; CHECK: --> {1,+,2}<%bb> +; CHECK: --> {1,+,2}<%bb> %8 = sext i32 %7 to i64 ; [#uses=1] ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %9 = getelementptr inbounds double* %q, i64 %8 ; [#uses=1] ; Artificially repeat the above three instructions, this time using @@ -45,11 +45,11 @@ bb: ; preds = %bb.nph, %bb1 %t7 = add nsw i32 %i.01, 1 ; [#uses=1] ; CHECK: %t8 = sext i32 %t7 to i64 -; CHECK: --> {1,+,2}<%bb> +; CHECK: --> {1,+,2}<%bb> %t8 = sext i32 %t7 to i64 ; [#uses=1] ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %t9 = getelementptr inbounds double* %q, i64 %t8 ; [#uses=1] %10 = load double* %9, align 8 ; [#uses=1] diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll index 9d8e2b62a9d2..da35a6cf7ae2 100644 --- a/test/Analysis/ScalarEvolution/nsw.ll +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -35,7 +35,7 @@ bb: ; preds = %bb1, %bb.nph bb1: ; preds = %bb %phitmp = sext i32 %tmp8 to i64 ; [#uses=1] ; CHECK: %phitmp -; CHECK-NEXT: --> {1,+,1}<%bb> +; CHECK-NEXT: --> {1,+,1}<%bb> %tmp9 = getelementptr double* %p, i64 %phitmp ; [#uses=1] ; CHECK: %tmp9 ; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> @@ -62,11 +62,11 @@ for.body.lr.ph.i.i: ; preds = %entry for.body.i.i: ; preds = %for.body.i.i, %for.body.lr.ph.i.i %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ] ; CHECK: %__first.addr.02.i.i -; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> +; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> store i32 0, i32* %__first.addr.02.i.i, align 4 %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1 ; CHECK: %ptrincdec.i.i -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i @@ -88,7 +88,7 @@ for.body.i.i: ; preds = %entry, %for.body.i. ; CHECK: %indvar.i.i ; CHECK: {0,+,1}<%for.body.i.i> %tmp = add nsw i64 %indvar.i.i, 1 -; CHECK: %tmp = +; CHECK: %tmp = ; CHECK: {1,+,1}<%for.body.i.i> %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp ; CHECK: %ptrincdec.i.i = @@ -99,8 +99,8 @@ for.body.i.i: ; preds = %entry, %for.body.i. store i32 0, i32* %__first.addr.08.i.i, align 4 %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i -; CHECK: Loop %for.body.i.i: Unpredictable backedge-taken count. -; CHECK: Loop %for.body.i.i: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) +; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) _ZSt4fillIPiiEvT_S1_RKT0_.exit: ; preds = %for.body.i.i, %entry ret void } \ No newline at end of file diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll index 2af794fbbc3e..d5d32689e17c 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-0.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep { --> \{-128,+,1\}<%bb1> Exits: 127} | count 5 +; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the ; trip count is within range where this is safe. @@ -13,9 +12,17 @@ bb1.thread: bb1: ; preds = %bb1, %bb1.thread %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3] +; CHECK: %i.0.reg2mem.0 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1] +; CHECK: %0 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1] +; CHECK: %1 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %2 = sext i9 %1 to i64 ; [#uses=1] +; CHECK: %2 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %3 = getelementptr double* %x, i64 %2 ; [#uses=1] %4 = load double* %3, align 8 ; [#uses=1] %5 = fmul double %4, 3.900000e+00 ; [#uses=1] diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll new file mode 100644 index 000000000000..8f080e2108bd --- /dev/null +++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -0,0 +1,27 @@ +; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" + +; TBAA should prove that these calls don't interfere, since they are +; IntrArgReadMem and have TBAA metadata. + +; CHECK: define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +entry: + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2 + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1 + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2 + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind + +!0 = metadata !{metadata !"tbaa root", null} +!1 = metadata !{metadata !"A", metadata !0} +!2 = metadata !{metadata !"B", metadata !0} diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index 6752bd8281bd..e4e2d3a56e04 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -7,6 +7,8 @@ ; RUN: llvm-as < %s | llvm-dis | \ ; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*} ; RUN: llvm-as < %s | llvm-dis | \ +; RUN: not grep {llvm\\.x86\\.sse2\\.loadu} +; RUN: llvm-as < %s | llvm-dis | \ ; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16 declare i32 @llvm.ctpop.i28(i28 %val) @@ -79,3 +81,13 @@ define void @sh64(<1 x i64> %A, <2 x i32> %B) { %r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] ret void } + +declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone +declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone +define void @test_loadu(i8* %a, double* %b) { + %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a) + %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a) + %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b) + ret void +} diff --git a/test/Assembler/aggregate-return-single-value.ll b/test/Assembler/aggregate-return-single-value.ll index 02fb59f8a0a4..04540b54af1f 100644 --- a/test/Assembler/aggregate-return-single-value.ll +++ b/test/Assembler/aggregate-return-single-value.ll @@ -1,14 +1,8 @@ ; RUN: llvm-as < %s | llvm-dis -define { i32 } @fooa() nounwind { - ret i32 0 -} define { i32 } @foob() nounwind { ret {i32}{ i32 0 } } -define [1 x i32] @fooc() nounwind { - ret i32 0 -} define [1 x i32] @food() nounwind { ret [1 x i32][ i32 0 ] } diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll index fe23d26fbeb4..16362abc716f 100644 --- a/test/Assembler/comment.ll +++ b/test/Assembler/comment.ll @@ -7,7 +7,7 @@ ; BARE: } @B = external global i32 -; ANNOT: @B = external global i32 ; [#uses=0] +; ANNOT: @B = external global i32 ; [#uses=0 type=i32*] define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { entry: @@ -15,6 +15,5 @@ entry: ret <4 x i1> %cmp } -; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] - +; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1 type=<4 x i1>] diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll index 272cd424e2a2..feb2d74138d0 100644 --- a/test/Bitcode/neon-intrinsics.ll +++ b/test/Bitcode/neon-intrinsics.ll @@ -76,20 +76,13 @@ ; CHECK: zext <4 x i16> ; CHECK-NEXT: sub <4 x i32> -; vmull should be auto-upgraded to multiply with sext/zext -; (but vmullp should remain an intrinsic) +; vmull* intrinsics will remain intrinsics ; CHECK: vmulls8 -; CHECK-NOT: arm.neon.vmulls.v8i16 -; CHECK: sext <8 x i8> -; CHECK-NEXT: sext <8 x i8> -; CHECK-NEXT: mul <8 x i16> +; CHECK: arm.neon.vmulls.v8i16 ; CHECK: vmullu16 -; CHECK-NOT: arm.neon.vmullu.v4i32 -; CHECK: zext <4 x i16> -; CHECK-NEXT: zext <4 x i16> -; CHECK-NEXT: mul <4 x i32> +; CHECK: arm.neon.vmullu.v4i32 ; CHECK: vmullp8 ; CHECK: arm.neon.vmullp.v8i16 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b696682c13fa..993b6e274443 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,14 +37,32 @@ if(PYTHONINTERP_FOUND) foreach(INC_DIR ${INC_DIRS}) set(IDIRS "${IDIRS} -I${INC_DIR}") endforeach() - string(REPLACE "" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + + if( MSVC ) + # The compiler's path may contain white space. Wrap it: + string(REPLACE "" "\\\"${CMAKE_CXX_COMPILER}\\\"" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + # Eliminate continuation lines from NMake flow. PR9680 + string(REPLACE "@<<\n" " " TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "\n<<" " " TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + else() + string(REPLACE "" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + endif() + string(REPLACE "" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) string(REPLACE "" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) - string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + if (MSVC) # PR9680 + # Eliminate MSVC equivalent of -o + string(REPLACE "/Fo" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + # Eliminate "how to rename program database" argument + string(REPLACE "/Fd" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + else() + string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + endif(MSVC) string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}") if(NOT MSVC) set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++") + # MSVC already has /TP to indicate a C++ source file endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index c31b116c55b2..b37de9dbbdfd 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s @.str = private constant [1 x i8] zeroinitializer, align 1 diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll index d9e1a1486a3c..fee86008ad71 100644 --- a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll +++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll @@ -6,7 +6,7 @@ define i32 @bar(i32 %a) nounwind { entry: %0 = tail call i32 @foo(i32 %a) nounwind ; [#uses=1] %1 = add nsw i32 %0, 3 ; [#uses=1] -; CHECK: ldmia sp!, {r11, pc} +; CHECK: pop {r11, pc} ; V4: pop ; V4-NEXT: mov pc, lr ret i32 %1 diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index 5ad1c09eda4a..df9dbca313f2 100644 --- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -7,13 +7,13 @@ define zeroext i8 @t(%struct.foo* %this) noreturn optsize { entry: ; ARM: t: -; ARM: str r0, [r1], r0 +; ARM: str r2, [r1], r0 ; THUMB: t: ; THUMB-NOT: str r0, [r1], r0 -; THUMB: str r0, [r1] +; THUMB: str r2, [r1] %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; [#uses=1] - store i32 undef, i32* inttoptr (i32 8 to i32*), align 8 + store i32 0, i32* inttoptr (i32 8 to i32*), align 8 br i1 undef, label %bb.nph96, label %bb3 bb3: ; preds = %entry diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index f077d04803bd..25d38ed77425 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg -; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. +; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_breg +; Use DW_OP_breg in variable's location expression if the variable is in a stack slot. %struct.SVal = type { i8*, i32 } diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll index 163c9b030ec8..32d350e9c8b1 100644 --- a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll +++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll @@ -4,9 +4,9 @@ ; was being treated as an instruction count. ; CHECK: push -; CHECK: ldmia -; CHECK: ldmia -; CHECK: ldmia +; CHECK: pop +; CHECK: pop +; CHECK: pop define i32 @test(i32 %x) { entry: diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll index 8d7541feae94..e3c18cefd51d 100644 --- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll +++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll @@ -10,7 +10,7 @@ entry: ; ARM: bl _foo ; ARM: bl _foo ; ARM: bl _foo -; ARM: ldmia sp!, {r7, pc} +; ARM: pop {r7, pc} ; THUMB2: t: ; THUMB2: push diff --git a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll deleted file mode 100644 index d5aefbee197c..000000000000 --- a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll +++ /dev/null @@ -1,100 +0,0 @@ -; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=PIC01 %s - -;; FIXME: Reduce this test further, or even better, -;; redo as .s -> .o test once ARM AsmParser is working better - -; ModuleID = 'large2.pnacl.bc' -target triple = "armv7-none-linux-gnueabi" - -%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] } -%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 } -%struct.__sbuf = type { i8*, i32 } -%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 } -%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* } -%struct._flock_t = type { i32, i32, i32, i32, i32 } -%struct._glue = type { %struct._glue*, i32, %struct.__FILE* } -%struct._mbstate_t = type { i32, %union.anon } -%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t } -%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** } -%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 } -%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 } -%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* } -%union.anon = type { i32 } - -@buf = constant [2 x i8] c"x\00", align 4 -@_impure_ptr = external thread_local global %struct._reent* -@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4 -@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4 - -define i32 @main() nounwind { -entry: - %0 = load %struct._reent** @_impure_ptr, align 4 - %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1 - %2 = load %struct.__FILE** %1, align 4 - %3 = bitcast %struct.__FILE* %2 to i8* - %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind - %5 = load %struct._reent** @_impure_ptr, align 4 - %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1 - %7 = load %struct.__FILE** %6, align 4 - %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind - store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4 - %9 = load %struct._reent** @_impure_ptr, align 4 - %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1 - %11 = load %struct.__FILE** %10, align 4 - %12 = bitcast %struct.__FILE* %11 to i8* - %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind - ret i32 1 -} - - -; PIC01: Relocation 0x00000000 -; PIC01-NEXT: 'r_offset', 0x0000001c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - - -; PIC01: Relocation 0x00000001 -; PIC01-NEXT: 'r_offset', 0x00000038 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000002 -; PIC01-NEXT: 'r_offset', 0x00000044 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000003 -; PIC01-NEXT: 'r_offset', 0x00000070 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000004 -; PIC01-NEXT: 'r_offset', 0x0000007c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000019 - - -; PIC01: Relocation 0x00000005 -; PIC01-NEXT: 'r_offset', 0x00000080 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000018 - -; PIC01: Relocation 0x00000006 -; PIC01-NEXT: 'r_offset', 0x00000084 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000068 - -; PIC01: Relocation 0x00000007 -; PIC01-NEXT: 'r_offset', 0x00000088 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001a - -; PIC01: Relocation 0x00000008 -; PIC01-NEXT: 'r_offset', 0x0000008c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000018 - -declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind - -declare i32 @fflush(%struct.__FILE* nocapture) nounwind diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll index eaa34e7960fb..69d4a1482299 100644 --- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll +++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll @@ -10,12 +10,12 @@ @STRIDE = internal global i32 8 ; ASM: .type array00,%object @ @array00 -; ASM-NEXT: .lcomm array00,80 @ @array00 +; ASM-NEXT: .lcomm array00,80 ; ASM-NEXT: .type _MergedGlobals,%object @ @_MergedGlobals -; OBJ: Section 0x00000003 +; OBJ: Section 0x00000004 ; OBJ-NEXT: '.bss' ; OBJ: 'array00' @@ -24,7 +24,7 @@ ; OBJ-NEXT: 'st_bind', 0x00000000 ; OBJ-NEXT: 'st_type', 0x00000001 ; OBJ-NEXT: 'st_other', 0x00000000 -; OBJ-NEXT: 'st_shndx', 0x00000003 +; OBJ-NEXT: 'st_shndx', 0x00000004 define i32 @main(i32 %argc) nounwind { %1 = load i32* @sum, align 4 diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll new file mode 100644 index 000000000000..81babe0b4b19 --- /dev/null +++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 + +; rdar://9117613 + +%struct.mo = type { i32, %struct.mo_pops* } +%struct.mo_pops = type { void (%struct.mo*)*, void (%struct.mo*)*, i32 (%struct.mo*, i32*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i64, i32, i32, i32*, i64, i32)*, i32 (%struct.mo*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.mo*, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i32)*, i8* } +%struct.ui = type { %struct.mo*, i32*, i32, i32*, i32*, i64, i32*, i32*, i32* } + + +define internal fastcc i32 @t(i32* %vp, i32 %withfsize, i64 %filesize) nounwind { +entry: + br i1 undef, label %bb1, label %bb + +bb: ; preds = %entry + unreachable + +bb1: ; preds = %entry + %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind + call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + %1 = getelementptr inbounds %struct.ui* %0, i32 0, i32 0 + store %struct.mo* undef, %struct.mo** %1, align 4 + %2 = getelementptr inbounds %struct.ui* %0, i32 0, i32 5 + %3 = load i64* %2, align 4 + %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind + br i1 undef, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + unreachable + +bb3: ; preds = %bb1 + br i1 undef, label %bb4, label %bb6 + +bb4: ; preds = %bb3 + %5 = call i32 @vn_size(i32* %vp, i64* %2, i32* undef) nounwind + unreachable + +bb6: ; preds = %bb3 + ret i32 0 +} + +declare %struct.ui* @vn_pp_to_ui(i32*) + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) + +declare i32 @vn_size(i32*, i64*, i32*) diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll new file mode 100644 index 000000000000..ccda281e901e --- /dev/null +++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4. +; rdar://9133587 + +%struct.Outer = type { i32, [2 x %"struct.Outer::Inner"] } +%"struct.Outer::Inner" = type { i32, i32, i8, i8 } + +@oStruct = external global %struct.Outer, align 4 + +define void @main() nounwind { +; CHECK: main: +; CHECK-NOT: ldrd +; CHECK: mul +for.body.lr.ph: + br label %for.body + +for.body: ; preds = %_Z14printIsNotZeroi.exit17.for.body_crit_edge, %for.body.lr.ph + %tmp3 = phi i1 [ false, %for.body.lr.ph ], [ %phitmp27, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ] + %i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ] + %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0 + %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1 + %inc = add i32 %i.022, 1 + br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i + +if.then.i: ; preds = %for.body + unreachable + +_Z14printIsNotZeroi.exit: ; preds = %for.body + %tmp8 = load i32* %x, align 4, !tbaa !0 + %tmp11 = load i32* %y, align 4, !tbaa !0 + %mul = mul nsw i32 %tmp11, %tmp8 + %tobool.i14 = icmp eq i32 %mul, 0 + br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16 + +if.then.i16: ; preds = %_Z14printIsNotZeroi.exit + unreachable + +_Z14printIsNotZeroi.exit17: ; preds = %_Z14printIsNotZeroi.exit + br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end + +_Z14printIsNotZeroi.exit17.for.body_crit_edge: ; preds = %_Z14printIsNotZeroi.exit17 + %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3 + %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3 + %phitmp27 = icmp eq i8 undef, 0 + br label %for.body + +for.end: ; preds = %_Z14printIsNotZeroi.exit17 + ret void +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"bool", metadata !1} diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll new file mode 100644 index 000000000000..7c9af6f5e590 --- /dev/null +++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; subs r4, #1 +; cmp r4, 0 +; bgt +; cmp cannot be optimized away since it will clear the overflow bit. +; gt / ge, lt, le conditions all depend on V bit. +; rdar://9172742 + +define i32 @t() nounwind { +; CHECK: t: +entry: + br label %bb2 + +bb: ; preds = %bb2 + %0 = tail call i32 @rand() nounwind + %1 = icmp eq i32 %0, 50 + br i1 %1, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp = tail call i32 @puts() nounwind + %indvar.next = add i32 %indvar, 1 + br label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK: bb2 +; CHECK: subs [[REG:r[0-9]+]], #1 +; CHECK: cmp [[REG]], #0 +; CHECK: bgt + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tries.0 = sub i32 2147483647, %indvar + %tmp1 = icmp sgt i32 %tries.0, 0 + br i1 %tmp1, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb + ret i32 0 +} + +declare i32 @rand() + +declare i32 @puts() nounwind diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll new file mode 100644 index 000000000000..a61908fd7c45 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-07-schediv.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +; Tests preRAsched support for VRegCycle interference. + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize { +entry: + %src_copy_start6 = bitcast float* %src_copy_start to i8* + %0 = icmp eq i32 %src_width, 0 + br i1 %0, label %return, label %bb + +; Make sure the scheduler schedules all uses of the preincrement +; induction variable before defining the postincrement value. +; CHECK: t: +; CHECK-NOT: mov +bb: ; preds = %entry, %bb + %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ] + %tmp = mul i32 %j.05, %src_copy_start_index + %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp + %src_copy_start_addr.04 = bitcast i8* %uglygep to float* + %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05 + %1 = load float* %src_copy_start_addr.04, align 4 + store float %1, float* %dst_copy_start_addr.03, align 4 + %2 = add i32 %j.05, 1 + %exitcond = icmp eq i32 %2, %src_width + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll new file mode 100644 index 000000000000..a9dd97182a4c --- /dev/null +++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s + +; Overly aggressive LICM simply adds copies of constants +; rdar://9266679 + +define zeroext i1 @t(i32* nocapture %A, i32 %size, i32 %value) nounwind readonly ssp { +; CHECK: t: +entry: + br label %for.cond + +for.cond: + %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ult i32 %0, %size + br i1 %cmp, label %for.body, label %return + +for.body: +; CHECK: %for.body +; CHECK: movs r{{[0-9]+}}, #1 + %arrayidx = getelementptr i32* %A, i32 %0 + %tmp4 = load i32* %arrayidx, align 4 + %cmp6 = icmp eq i32 %tmp4, %value + br i1 %cmp6, label %return, label %for.inc + +; CHECK: %for.cond +; CHECK: movs r{{[0-9]+}}, #0 + +for.inc: + %inc = add i32 %0, 1 + br label %for.cond + +return: + %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ] + ret i1 %retval.0 +} diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll new file mode 100644 index 000000000000..317be94e86b0 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +; CHECK: align 3 +@.v = linker_private unnamed_addr constant <4 x i32> , align 8 +; CHECK: align 2 +@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00" +; CHECK-NOT: align +@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1 diff --git a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll new file mode 100644 index 000000000000..eb23de0b9716 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast +; Previously we'd crash as out of registers on this input by clobbering all of +; the aliases. +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +define void @_Z8TestCasev() nounwind ssp { +entry: + %a = alloca float, align 4 + %tmp = load float* %a, align 4 + call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0 + ret void +} + +!0 = metadata !{i32 109} diff --git a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll new file mode 100644 index 000000000000..e712e08ddb6a --- /dev/null +++ b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +; CHECK: _f +; CHECK-NOT: ands +; CHECK: cmp +; CHECK: blxle _g + +define i32 @f(i32 %a, i32 %b) nounwind ssp { +entry: + %and = and i32 %b, %a + %cmp = icmp slt i32 %and, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @g(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %and +} + +declare void @g(...) diff --git a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll new file mode 100644 index 000000000000..5404cf57a59f --- /dev/null +++ b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +; CHECK: _f +; CHECK: adds +; CHECK-NOT: cmp +; CHECK: blxeq _h + +define i32 @f(i32 %a, i32 %b) nounwind ssp { +entry: + %add = add nsw i32 %b, %a + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @h(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %add +} + +; CHECK: _g +; CHECK: orrs +; CHECK-NOT: cmp +; CHECK: blxeq _h + +define i32 @g(i32 %a, i32 %b) nounwind ssp { +entry: + %add = or i32 %b, %a + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @h(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %add +} + +declare void @h(...) diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll new file mode 100644 index 000000000000..ed7dd0332046 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s + +; Do not move the umull above previous call which would require use of +; more callee-saved registers and introduce copies. +; rdar://9329627 + +%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* } +%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 } + +@FuncPtr = external hidden unnamed_addr global %struct.FF* +@.str1 = external hidden unnamed_addr constant [6 x i8], align 4 +@G = external unnamed_addr global i32 +@.str2 = external hidden unnamed_addr constant [58 x i8], align 4 +@.str3 = external hidden unnamed_addr constant [58 x i8], align 4 + +define i32 @test() nounwind optsize ssp { +entry: +; CHECK: test: +; CHECK: push +; CHECK-NOT: push + %block_size = alloca i32, align 4 + %block_count = alloca i32, align 4 + %index_cache = alloca i32, align 4 + store i32 0, i32* %index_cache, align 4 + %tmp = load i32* @G, align 4 + %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind + switch i32 %tmp1, label %bb8 [ + i32 0, label %bb + i32 536870913, label %bb4 + i32 536870914, label %bb6 + ] + +bb: + %tmp2 = load i32* @G, align 4 + %tmp4 = icmp eq i32 %tmp2, 0 + br i1 %tmp4, label %bb1, label %bb8 + +bb1: +; CHECK: %bb1 +; CHECK-NOT: umull +; CHECK: blx _Get +; CHECK: umull +; CHECK: blx _foo + %tmp5 = load i32* %block_size, align 4 + %tmp6 = load i32* %block_count, align 4 + %tmp7 = call %struct.FF* @Get() nounwind + store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4 + %tmp10 = zext i32 %tmp6 to i64 + %tmp11 = zext i32 %tmp5 to i64 + %tmp12 = mul nsw i64 %tmp10, %tmp11 + %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind + br label %bb8 + +bb4: + ret i32 0 + +bb6: + ret i32 1 + +bb8: + ret i32 -1 +} + +declare i32 @printf(i8*, ...) + +declare %struct.FF* @Get() + +declare i32 @foo(i8*, i64, i32) + +declare i32 @bar(i32, i32, i32) diff --git a/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll new file mode 100644 index 000000000000..0741049cffdd --- /dev/null +++ b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios + +; If converter was being too cute. It look for root BBs (which don't have +; successors) and use inverse depth first search to traverse the BBs. However +; that doesn't work when the CFG has infinite loops. Simply do a linear +; traversal of all BBs work just fine. + +; rdar://9344645 + +%struct.hc = type { i32, i32, i32, i32 } + +define i32 @t(i32 %type) optsize { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: + unreachable + +if.else: + br i1 undef, label %if.then15, label %if.else18 + +if.then15: + unreachable + +if.else18: + switch i32 %type, label %if.else173 [ + i32 3, label %if.then115 + i32 1, label %if.then102 + ] + +if.then102: + br i1 undef, label %cond.true10.i, label %t.exit + +cond.true10.i: + br label %t.exit + +t.exit: + unreachable + +if.then115: + br i1 undef, label %if.else163, label %if.else145 + +if.else145: + %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize + br label %while.body172 + +if.else163: + %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize + br label %while.body172 + +while.body172: + br label %while.body172 + +if.else173: + ret i32 -1 +} + +declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize + diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll index d57c159b85cb..9589e72df2f5 100644 --- a/test/CodeGen/ARM/align.ll +++ b/test/CodeGen/ARM/align.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-apple-darwin10 | FileCheck %s -check-prefix=DARWIN @a = global i1 true ; no alignment diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index c7fcb9755d9e..a8b42e63b71f 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i64 %b) { define i32 @f2() nounwind optsize { ; ELF: f2: ; ELF: mov [[REGISTER:(r[0-9]+)]], #128 -; ELF: str [[REGISTER]], [sp] +; ELF: str [[REGISTER]], [ ; DARWIN: f2: ; DARWIN: mov r3, #128 entry: diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 50c638b73931..07620700aedb 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -23,15 +23,15 @@ tailrecurse: ; preds = %sw.bb, %entry %tmp2 = load i8** %scevgep5 %0 = ptrtoint i8* %tmp2 to i32 -; ARM: ands r12, r12, #3 +; ARM: ands {{r[0-9]+}}, {{r[0-9]+}}, #3 ; ARM-NEXT: beq -; THUMB: movs r5, #3 -; THUMB-NEXT: ands r5, r4 -; THUMB-NEXT: cmp r5, #0 +; THUMB: movs r[[R0:[0-9]+]], #3 +; THUMB-NEXT: ands r[[R0]], r +; THUMB-NEXT: cmp r[[R0]], #0 ; THUMB-NEXT: beq -; T2: ands r12, r12, #3 +; T2: ands {{r[0-9]+}}, {{r[0-9]+}}, #3 ; T2-NEXT: beq %and = and i32 %0, 3 diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll index 382a18334600..95edaad47e5f 100644 --- a/test/CodeGen/ARM/arm-returnaddr.ll +++ b/test/CodeGen/ARM/arm-returnaddr.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s ; rdar://8015977 ; rdar://8020118 @@ -16,7 +18,7 @@ define i8* @rt2() nounwind readnone { entry: ; CHECK: rt2: ; CHECK: {r7, lr} -; CHECK: ldr r0, [r7] +; CHECK: ldr r[[R0:[0-9]+]], [r7] ; CHECK: ldr r0, [r0] ; CHECK: ldr r0, [r0, #4] %0 = tail call i8* @llvm.returnaddress(i32 2) diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll new file mode 100644 index 000000000000..d0c4f3ae9d67 --- /dev/null +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s +; Avoid some 's' 16-bit instruction which partially update CPSR (and add false +; dependency) when it isn't dependent on last CPSR defining instruction. +; rdar://8928208 + +define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone { + entry: +; CHECK: t: +; CHECK: muls r2, r3, r2 +; CHECK-NEXT: mul r0, r0, r1 +; CHECK-NEXT: muls r0, r2, r0 + %0 = mul nsw i32 %a, %b + %1 = mul nsw i32 %c, %d + %2 = mul nsw i32 %0, %1 + ret i32 %2 +} diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll index 09f1aae0a9f0..5533038fb828 100644 --- a/test/CodeGen/ARM/bx_fold.ll +++ b/test/CodeGen/ARM/bx_fold.ll @@ -24,7 +24,7 @@ bb1: ; preds = %bb, %entry bb18: ; preds = %bb1 ; CHECK-NOT: bx -; CHECK: ldmia sp! +; CHECK: pop ret void } diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index a77aba037be5..4dc37aa27558 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -74,7 +74,7 @@ entry: ; CHECKT2: t7: ; CHECKT2: blxeq _foo ; CHECKT2-NEXT: pop.w -; CHECKT2-NEXT: b.w _foo +; CHECKT2-NEXT: b _foo br i1 undef, label %bb, label %bb1.lr.ph bb1.lr.ph: diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index a6a7ed6af184..9b90408cc4db 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -19,3 +19,20 @@ entry: %tmp2 = sub i64 %tmp1, %b ret i64 %tmp2 } + +; add with live carry +define i64 @f3(i32 %al, i32 %bl) { +; CHECK: f3: +; CHECK: adds r +; CHECK: adcs r +; CHECK: adc r +entry: + ; unsigned wide add + %aw = zext i32 %al to i64 + %bw = zext i32 %bl to i64 + %cw = add i64 %aw, %bw + ; ch == carry bit + %ch = lshr i64 %cw, 32 + %dw = add i64 %ch, %bw + ret i64 %dw +} diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll index 845be8c20ea5..91ef65925221 100644 --- a/test/CodeGen/ARM/code-placement.ll +++ b/test/CodeGen/ARM/code-placement.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin -cgp-critical-edge-splitting=0 | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s ; PHI elimination shouldn't break backedge. ; rdar://8263994 @@ -72,7 +72,7 @@ bb2.preheader: ; preds = %bb3, %bb.nph15 br i1 %4, label %bb1, label %bb3 ; CHECK: LBB1_[[RET]]: @ %bb5 -; CHECK: ldmia sp! +; CHECK: pop bb5: ; preds = %bb3, %entry %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; [#uses=1] ret i32 %sum.1.lcssa diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll index 542cf02f2a90..7893df782054 100644 --- a/test/CodeGen/ARM/constants.ll +++ b/test/CodeGen/ARM/constants.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=arm -disable-cgp-branch-opts | FileCheck %s define i32 @f1() { ; CHECK: f1 @@ -14,31 +14,31 @@ define i32 @f2() { define i32 @f3() { ; CHECK: f3 -; CHECK: mov r0, #1, 24 +; CHECK: mov r0, #1, #24 ret i32 256 } define i32 @f4() { ; CHECK: f4 -; CHECK: orr{{.*}}#1, 24 +; CHECK: orr{{.*}}#1, #24 ret i32 257 } define i32 @f5() { ; CHECK: f5 -; CHECK: mov r0, #255, 2 +; CHECK: mov r0, #255, #2 ret i32 -1073741761 } define i32 @f6() { ; CHECK: f6 -; CHECK: mov r0, #63, 28 +; CHECK: mov r0, #63, #28 ret i32 1008 } define void @f7(i32 %a) { ; CHECK: f7 -; CHECK: cmp r0, #1, 16 +; CHECK: cmp r0, #1, #16 %b = icmp ugt i32 %a, 65536 br i1 %b, label %r, label %r r: diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll new file mode 100644 index 000000000000..8a865e23d0a4 --- /dev/null +++ b/test/CodeGen/ARM/crash-greedy.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -regalloc=greedy -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -verify-machineinstrs | FileCheck %s +; +; ARM tests that crash or fail with the greedy register allocator. + +target triple = "thumbv7-apple-darwin" + +declare double @exp(double) + +; CHECK: remat_subreg +define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df) nounwind { +entry: + %conv16 = fpext float %lambda to double + %mul17 = fmul double %conv16, -1.000000e+00 + br i1 undef, label %cond.end.us, label %cond.end + +cond.end.us: ; preds = %entry + unreachable + +cond.end: ; preds = %cond.end, %entry + %mul = fmul double undef, 0.000000e+00 + %add = fadd double undef, %mul + %add46 = fadd double undef, undef + %add75 = fadd double 0.000000e+00, undef + br i1 undef, label %for.end, label %cond.end + +for.end: ; preds = %cond.end + %conv78 = sitofp i32 %z to double + %conv83 = fpext float %c to double + %mul84 = fmul double %mul17, %conv83 + %call85 = tail call double @exp(double %mul84) nounwind + %mul86 = fmul double %conv78, %call85 + %add88 = fadd double 0.000000e+00, %mul86 +; CHECK: blx _exp + %call100 = tail call double @exp(double %mul84) nounwind + %mul101 = fmul double undef, %call100 + %add103 = fadd double %add46, %mul101 + %mul111 = fmul double undef, %conv83 + %mul119 = fmul double %mul111, undef + %add121 = fadd double undef, %mul119 + %div = fdiv double 1.000000e+00, %conv16 + %div126 = fdiv double %add, %add75 + %sub = fsub double %div, %div126 + %div129 = fdiv double %add103, %add88 + %add130 = fadd double %sub, %div129 + %conv131 = fptrunc double %add130 to float + store float %conv131, float* %ret_f, align 4 + %mul139 = fmul double %div129, %div129 + %div142 = fdiv double %add121, %add88 + %sub143 = fsub double %mul139, %div142 +; %lambda is passed on the stack, and the stack slot load is rematerialized. +; The rematted load of a float constrains the D register used for the mul. +; CHECK: vldr + %mul146 = fmul float %lambda, %lambda + %conv147 = fpext float %mul146 to double + %div148 = fdiv double 1.000000e+00, %conv147 + %sub149 = fsub double %sub143, %div148 + %conv150 = fptrunc double %sub149 to float + store float %conv150, float* %ret_df, align 4 + ret void +} + +; CHECK: insert_elem +; This test has a sub-register copy with a kill flag: +; %vreg6:ssub_3 = COPY %vreg6:ssub_2; QPR_VFP2:%vreg6 +; The rewriter must do something sensible with that, or the scavenger crashes. +define void @insert_elem() nounwind { +entry: + br i1 undef, label %if.end251, label %if.then84 + +if.then84: ; preds = %entry + br i1 undef, label %if.end251, label %if.then195 + +if.then195: ; preds = %if.then84 + %div = fdiv float 1.000000e+00, undef + %vecinit207 = insertelement <4 x float> undef, float %div, i32 1 + %vecinit208 = insertelement <4 x float> %vecinit207, float 1.000000e+00, i32 2 + %vecinit209 = insertelement <4 x float> %vecinit208, float 1.000000e+00, i32 3 + %mul216 = fmul <4 x float> zeroinitializer, %vecinit209 + store <4 x float> %mul216, <4 x float>* undef, align 16 + br label %if.end251 + +if.end251: ; preds = %if.then195, %if.then84, %entry + ret void +} diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll new file mode 100644 index 000000000000..8c9095e3a9ea --- /dev/null +++ b/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s - | FileCheck %s +; Radar 9309221 +; Test dwarf reg no for d16 +;CHECK: DW_OP_regx +;CHECK-NEXT: 272 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4 +@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4 + +define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !19), !dbg !26 + tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !20), !dbg !26 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !21), !dbg !26 + %0 = zext i8 %c to i32, !dbg !27 + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27 + ret i32 0, !dbg !29 +} + +define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !16), !dbg !30 + tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !17), !dbg !30 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !18), !dbg !30 + %0 = zext i8 %c to i32, !dbg !31 + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31 + ret i32 0, !dbg !33 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize { +entry: + tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !22), !dbg !34 + tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !23), !dbg !34 + %0 = sitofp i32 %argc to double, !dbg !35 + %1 = fadd double %0, 5.555552e+05, !dbg !35 + tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !24), !dbg !35 + %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36 + %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37 + %4 = trunc i32 %argc to i8, !dbg !37 + %5 = add i8 %4, 97, !dbg !37 + tail call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !19) nounwind, !dbg !38 + tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !20) nounwind, !dbg !38 + tail call void @llvm.dbg.value(metadata !{i8 %5}, i64 0, metadata !21) nounwind, !dbg !38 + %6 = zext i8 %5 to i32, !dbg !39 + %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39 + %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40 + ret i32 0, !dbg !41 +} + +declare i32 @puts(i8* nocapture) nounwind + +!llvm.dbg.sp = !{!0, !9, !10} +!llvm.dbg.lv.printer = !{!16, !17, !18} +!llvm.dbg.lv.inlineprinter = !{!19, !20, !21} +!llvm.dbg.lv.main = !{!22, !23, !24} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8} +!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{metadata !5, metadata !5, metadata !13} +!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] +!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] +!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ] +!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ] +!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ] +!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ] +!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ] +!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!26 = metadata !{i32 4, i32 0, metadata !9, null} +!27 = metadata !{i32 6, i32 0, metadata !28, null} +!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 7, i32 0, metadata !28, null} +!30 = metadata !{i32 11, i32 0, metadata !0, null} +!31 = metadata !{i32 13, i32 0, metadata !32, null} +!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!33 = metadata !{i32 14, i32 0, metadata !32, null} +!34 = metadata !{i32 17, i32 0, metadata !10, null} +!35 = metadata !{i32 19, i32 0, metadata !25, null} +!36 = metadata !{i32 20, i32 0, metadata !25, null} +!37 = metadata !{i32 21, i32 0, metadata !25, null} +!38 = metadata !{i32 4, i32 0, metadata !9, metadata !37} +!39 = metadata !{i32 6, i32 0, metadata !28, metadata !37} +!40 = metadata !{i32 22, i32 0, metadata !25, null} +!41 = metadata !{i32 23, i32 0, metadata !25, null} diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll new file mode 100644 index 000000000000..e83a83d1f10a --- /dev/null +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s - | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-macosx10.6.7" + +;CHECK: DW_OP_regx for Q register: D1 +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: byte 8 +;CHECK-NEXT: DW_OP_regx for Q register: D2 +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: byte 8 + +@.str = external constant [13 x i8] + +declare <4 x float> @test0001(float) nounwind readnone ssp + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + br label %for.body9 + +for.body9: ; preds = %for.body9, %entry + %add19 = fadd <4 x float> undef, , !dbg !39 + br i1 undef, label %for.end54, label %for.body9, !dbg !44 + +for.end54: ; preds = %for.body9 + tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39 + %tmp115 = extractelement <4 x float> %add19, i32 1 + %conv6.i75 = fpext float %tmp115 to double, !dbg !45 + %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45 + ret i32 0, !dbg !49 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !10, !14} +!llvm.dbg.lv.test0001 = !{!18} +!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29} +!llvm.dbg.lv.printFV = !{!30} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ] +!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!8 = metadata !{metadata !9} +!9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ] +!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{metadata !13} +!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ] +!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!17 = metadata !{null} +!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ] +!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] +!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] +!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ] +!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ] +!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ] +!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ] +!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] +!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] +!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] +!34 = metadata !{metadata !35, metadata !37} +!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] +!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] +!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] +!39 = metadata !{i32 79, i32 7, metadata !40, null} +!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ] +!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ] +!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ] +!44 = metadata !{i32 75, i32 5, metadata !42, null} +!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48} +!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ] +!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ] +!48 = metadata !{i32 95, i32 3, metadata !25, null} +!49 = metadata !{i32 99, i32 3, metadata !25, null} diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll new file mode 100644 index 000000000000..548c9bdebf02 --- /dev/null +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s - | FileCheck %s +; Radar 9309221 +; Test dwarf reg no for s16 +;CHECK: DW_OP_regx for S register +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_bit_piece 32 0 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-macosx10.6.7" + +@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00" +@.str1 = private unnamed_addr constant [6 x i8] c"point\00" + +define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !8), !dbg !24 + tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !10), !dbg !25 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !12), !dbg !26 + %conv = fpext float %val to double, !dbg !27 + %conv3 = zext i8 %c to i32, !dbg !27 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27 + ret i32 0, !dbg !29 +} + +declare i32 @printf(i8* nocapture, ...) nounwind optsize + +define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !14), !dbg !30 + tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !15), !dbg !31 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !16), !dbg !32 + %conv = fpext float %val to double, !dbg !33 + %conv3 = zext i8 %c to i32, !dbg !33 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33 + ret i32 0, !dbg !35 +} + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !17), !dbg !36 + tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !18), !dbg !37 + %conv = sitofp i32 %argc to double, !dbg !38 + %add = fadd double %conv, 5.555552e+05, !dbg !38 + %conv1 = fptrunc double %add to float, !dbg !38 + tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !22), !dbg !38 + %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39 + %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40 + %add5 = add nsw i32 %argc, 97, !dbg !40 + %conv6 = trunc i32 %add5 to i8, !dbg !40 + tail call void @llvm.dbg.value(metadata !{i8* %add.ptr}, i64 0, metadata !8) nounwind, !dbg !41 + tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !10) nounwind, !dbg !42 + tail call void @llvm.dbg.value(metadata !{i8 %conv6}, i64 0, metadata !12) nounwind, !dbg !43 + %conv.i = fpext float %conv1 to double, !dbg !44 + %conv3.i = and i32 %add5, 255, !dbg !44 + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44 + %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45 + ret i32 0, !dbg !46 +} + +declare i32 @puts(i8* nocapture) nounwind optsize + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !6, !7} +!llvm.dbg.lv.inlineprinter = !{!8, !10, !12} +!llvm.dbg.lv.printer = !{!14, !15, !16} +!llvm.dbg.lv.main = !{!17, !18, !22} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ] +!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ] +!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ] +!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ] +!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] +!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ] +!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ] +!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!24 = metadata !{i32 4, i32 22, metadata !0, null} +!25 = metadata !{i32 4, i32 33, metadata !0, null} +!26 = metadata !{i32 4, i32 52, metadata !0, null} +!27 = metadata !{i32 6, i32 3, metadata !28, null} +!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 7, i32 3, metadata !28, null} +!30 = metadata !{i32 11, i32 42, metadata !6, null} +!31 = metadata !{i32 11, i32 53, metadata !6, null} +!32 = metadata !{i32 11, i32 72, metadata !6, null} +!33 = metadata !{i32 13, i32 3, metadata !34, null} +!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!35 = metadata !{i32 14, i32 3, metadata !34, null} +!36 = metadata !{i32 17, i32 15, metadata !7, null} +!37 = metadata !{i32 17, i32 28, metadata !7, null} +!38 = metadata !{i32 19, i32 31, metadata !23, null} +!39 = metadata !{i32 20, i32 3, metadata !23, null} +!40 = metadata !{i32 21, i32 3, metadata !23, null} +!41 = metadata !{i32 4, i32 22, metadata !0, metadata !40} +!42 = metadata !{i32 4, i32 33, metadata !0, metadata !40} +!43 = metadata !{i32 4, i32 52, metadata !0, metadata !40} +!44 = metadata !{i32 6, i32 3, metadata !28, metadata !40} +!45 = metadata !{i32 22, i32 3, metadata !23, null} +!46 = metadata !{i32 23, i32 1, metadata !23, null} diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll new file mode 100644 index 000000000000..34313aa89aae --- /dev/null +++ b/test/CodeGen/ARM/divmod.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s + +define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { +entry: +; CHECK: foo: +; CHECK: bl ___divmodsi4 +; CHECK-NOT: bl ___divmodsi4 + %div = sdiv i32 %x, %y + store i32 %div, i32* %P, align 4 + %rem = srem i32 %x, %y + %arrayidx6 = getelementptr inbounds i32* %P, i32 1 + store i32 %rem, i32* %arrayidx6, align 4 + ret void +} + +define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { +entry: +; CHECK: bar: +; CHECK: bl ___udivmodsi4 +; CHECK-NOT: bl ___udivmodsi4 + %div = udiv i32 %x, %y + store i32 %div, i32* %P, align 4 + %rem = urem i32 %x, %y + %arrayidx6 = getelementptr inbounds i32* %P, i32 1 + store i32 %rem, i32* %arrayidx6, align 4 + ret void +} + +; rdar://9280991 +@flags = external unnamed_addr global i32 +@tabsize = external unnamed_addr global i32 + +define void @do_indent(i32 %cols) nounwind { +entry: +; CHECK: do_indent: + %0 = load i32* @flags, align 4 + %1 = and i32 %0, 67108864 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb + +bb: +; CHECK: bl ___divmodsi4 + %3 = load i32* @tabsize, align 4 + %4 = srem i32 %cols, %3 + %5 = sdiv i32 %cols, %3 + %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false) + %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind + br label %bb1 + +bb1: + %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ] + %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0 + store i8 0, i8* %8, align 1 + ret void +} + +declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone +declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index f03282bdab7f..51efe51bf152 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -24,4 +24,4 @@ declare float @fabsf(float) ; CORTEXA8: test: ; CORTEXA8: vabs.f32 d1, d1 ; CORTEXA9: test: -; CORTEXA9: vabs.f32 s1, s1 +; CORTEXA9: vabs.f32 s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 749690e98d0f..e35103c045eb 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vadd.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s0, s1, s0 +; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll new file mode 100644 index 000000000000..8de54ad5332b --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-pred.ll @@ -0,0 +1,58 @@ +; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s + +define i32 @main() nounwind ssp { +entry: + %retval = alloca i32, align 4 + %X = alloca <4 x i32>, align 16 + %Y = alloca <4 x float>, align 16 + store i32 0, i32* %retval + %tmp = load <4 x i32>* %X, align 16 + call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y) + %0 = load i32* %retval + ret i32 %0 +} + +define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %__a.addr.i = alloca <4 x i32>, align 16 + %v.addr = alloca <4 x i32>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + store <4 x i32> %v, <4 x i32>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load <4 x i32>* %v.addr, align 16 + store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16 + %tmp.i = load <4 x i32>* %__a.addr.i, align 16 + %0 = bitcast <4 x i32> %tmp.i to <16 x i8> + %1 = bitcast <16 x i8> %0 to <4 x i32> + %vcvt.i = sitofp <4 x i32> %1 to <4 x float> + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %tmp3 = load <4 x float>** %constants.addr, align 4 + call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3) + ret void +} + +define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %v.addr = alloca <4 x float>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + %data = alloca i64, align 4 + store <4 x float> %v, <4 x float>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load i64* %data, align 4 + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %add.ptr = getelementptr i8* %tmp1, i32 %tmp2 + %0 = bitcast i8* %add.ptr to i64* + %arrayidx = getelementptr inbounds i64* %0, i32 0 + store i64 %tmp, i64* %arrayidx + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll new file mode 100644 index 000000000000..08dcc64c9c84 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-redefinition.ll @@ -0,0 +1,11 @@ +; RUN: llc -O0 -regalloc=linearscan < %s +; This isn't exactly a useful set of command-line options, but check that it +; doesn't crash. (It was crashing because a register was getting redefined.) + +target triple = "thumbv7-apple-macosx10.6.7" + +define i32 @f(i32* %x) nounwind ssp { + %y = getelementptr inbounds i32* %x, i32 5000 + %tmp103 = load i32* %y, align 4 + ret i32 %tmp103 +} diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll index 8f58480be164..2d79674028ca 100644 --- a/test/CodeGen/ARM/fast-isel-static.ll +++ b/test/CodeGen/ARM/fast-isel-static.ll @@ -24,7 +24,7 @@ entry: store float 0.000000e+00, float* %ztot, align 4 store float 1.000000e+00, float* %z, align 4 ; CHECK-LONG: blx r2 -; CHECK-NORM: blx _myadd +; CHECK-NORM: bl _myadd call void @myadd(float* %ztot, float* %z) ret i32 0 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index dd806ec6f1ae..6aad92fbc6a4 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin -; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB ; Very basic fast-isel functionality. - define i32 @add(i32 %a, i32 %b) nounwind { entry: %a.addr = alloca i32, align 4 @@ -13,4 +12,52 @@ entry: %tmp1 = load i32* %b.addr %add = add nsw i32 %tmp, %tmp1 ret i32 %add -} \ No newline at end of file +} + +; Check truncate to bool +define void @test1(i32 %tmp) nounwind { +entry: +%tobool = trunc i32 %tmp to i1 +br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry +call void @test1(i32 0) +br label %if.end + +if.end: ; preds = %if.then, %entry +ret void +; ARM: test1: +; ARM: tst r0, #1 +; THUMB: test1: +; THUMB: tst.w r0, #1 +} + +; Check some simple operations with immediates +define void @test2(i32 %tmp, i32* %ptr) nounwind { +; THUMB: test2: +; ARM: test2: + +b1: + %a = add i32 %tmp, 4096 + store i32 %a, i32* %ptr + br label %b2 + +; THUMB: add.w {{.*}} #4096 +; ARM: add {{.*}} #1, #20 + +b2: + %b = add i32 %tmp, 4095 + store i32 %b, i32* %ptr + br label %b3 +; THUMB: addw {{.*}} #4095 +; ARM: movw {{.*}} #4095 +; ARM: add + +b3: + %c = or i32 %tmp, 4 + store i32 %c, i32* %ptr + ret void + +; THUMB: orr {{.*}} #4 +; ARM: orr {{.*}} #4 +} diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index d30e3ebf50a5..f241c2681cbf 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -40,5 +40,37 @@ entry: ret double %1 } +; rdar://9059537 +define i32 @test4() ssp { +entry: +; SOFT: test4: +; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00 +; This S-reg must be the first sub-reg of the last D-reg on vbsl. +; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]] +; SOFT: vshr.u64 [[REG4]], [[REG4]], #32 +; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000 +; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}} + %call80 = tail call double @copysign(double 1.000000e+00, double undef) + %conv81 = fptrunc double %call80 to float + %tmp88 = bitcast float %conv81 to i32 + ret i32 %tmp88 +} + +; rdar://9287902 +define float @test5() nounwind { +entry: +; SOFT: test5: +; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 +; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1 +; SOFT: vshr.u64 [[REG7]], [[REG7]], #32 +; SOFT: vbsl [[REG6]], [[REG7]], + %0 = tail call double (...)* @bar() nounwind + %1 = fptrunc double %0 to float + %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone + %3 = fadd float %1, %2 + ret float %3 +} + +declare double @bar(...) declare double @copysign(double, double) nounwind declare float @copysignf(float, float) nounwind diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 0c3149579297..31c1ca940502 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vdiv.f32 s0, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vdiv.f32 s0, s1, s0 +; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index fb83ef626af6..b63f609e755a 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD define float @t1(float %acc, float %a, float %b) { entry: @@ -49,3 +51,54 @@ entry: %1 = fadd float %0, %acc ret float %1 } + +; It's possible to make use of fp vmla / vmls on Cortex-A9. +; rdar://8659675 +define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) { +entry: +; A8: t4: +; A8: vmul.f32 +; A8: vmul.f32 +; A8: vadd.f32 +; A8: vadd.f32 + +; Two vmla with now RAW hazard +; A9: t4: +; A9: vmla.f32 +; A9: vmla.f32 + +; HARD: t4: +; HARD: vmla.f32 s0, s1, s2 +; HARD: vmla.f32 s3, s1, s4 + %0 = fmul float %a, %b + %1 = fadd float %acc1, %0 + %2 = fmul float %a, %c + %3 = fadd float %acc2, %2 + store float %1, float* %P1 + store float %3, float* %P2 + ret void +} + +define float @t5(float %a, float %b, float %c, float %d, float %e) { +entry: +; A8: t5: +; A8: vmul.f32 +; A8: vmul.f32 +; A8: vadd.f32 +; A8: vadd.f32 + +; A9: t5: +; A9: vmla.f32 +; A9: vmul.f32 +; A9: vadd.f32 + +; HARD: t5: +; HARD: vmla.f32 s4, s0, s1 +; HARD: vmul.f32 s0, s2, s3 +; HARD: vadd.f32 s0, s4, s0 + %0 = fmul float %a, %b + %1 = fadd float %e, %0 + %2 = fmul float %c, %d + %3 = fadd float %1, %2 + ret float %3 +} diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index ef4e3e52818e..bc118b8cb226 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s0, s1, s0 +; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 76c806761f75..9facf20fee7e 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) nounwind { entry: @@ -11,8 +12,8 @@ entry: ; NEON: vnmla.f32 ; A8: t1: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc @@ -28,8 +29,8 @@ entry: ; NEON: vnmla.f32 ; A8: t2: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc @@ -45,8 +46,8 @@ entry: ; NEON: vnmla.f64 ; A8: t3: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fsub double -0.0, %0 %2 = fsub double %1, %acc @@ -62,8 +63,8 @@ entry: ; NEON: vnmla.f64 ; A8: t4: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fmul double -1.0, %0 %2 = fsub double %1, %acc diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll new file mode 100644 index 000000000000..ae02b792e4d6 --- /dev/null +++ b/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s + +; CHECK: function1 +; CHECK-NOT: vmov +define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +entry: + %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind + ret double %call +} + +declare double @function2(double, double, double, double, double, double) diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index b6e9c3c22e75..8ef45f2bbc97 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -51,7 +51,7 @@ entry: define float @h2() { ;CHECK: h2: -;CHECK: mov r0, #254, 10 +;CHECK: mov r0, #254, #10 entry: ret float 1.000000e+00 } diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 1ef9f7f32164..86c06f1ddd9e 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -5,7 +5,7 @@ define i32 @test1(float %a, float %b) { ; VFP2: test1: -; VFP2: vcvt.s32.f32 s0, s0 +; VFP2: vcvt.s32.f32 s{{.}}, s{{.}} ; NEON: test1: ; NEON: vcvt.s32.f32 d0, d0 entry: @@ -16,7 +16,7 @@ entry: define i32 @test2(float %a, float %b) { ; VFP2: test2: -; VFP2: vcvt.u32.f32 s0, s0 +; VFP2: vcvt.u32.f32 s{{.}}, s{{.}} ; NEON: test2: ; NEON: vcvt.u32.f32 d0, d0 entry: @@ -27,7 +27,7 @@ entry: define float @test3(i32 %a, i32 %b) { ; VFP2: test3: -; VFP2: vcvt.f32.u32 s0, s0 +; VFP2: vcvt.f32.u32 s{{.}}, s{{.}} ; NEON: test3: ; NEON: vcvt.f32.u32 d0, d0 entry: @@ -38,7 +38,7 @@ entry: define float @test4(i32 %a, i32 %b) { ; VFP2: test4: -; VFP2: vcvt.f32.s32 s0, s0 +; VFP2: vcvt.f32.s32 s{{.}}, s{{.}} ; NEON: test4: ; NEON: vcvt.f32.s32 d0, d0 entry: diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 65b921bdf655..7c0dd0e12a79 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -37,7 +37,7 @@ define arm_apcscc i32 @t2(double* %a, double* %b) nounwind { entry: ; FINITE: t2: ; FINITE-NOT: vldr -; FINITE: ldrd r0, [r0] +; FINITE: ldrd r0, r1, [r0] ; FINITE-NOT: b LBB ; FINITE: cmp r0, #0 ; FINITE: cmpeq r1, #0 diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 75428ac21655..18f87bfc2e71 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -9,9 +9,9 @@ entry: ; CHECK: t: ; CHECK: vpop {d8} ; CHECK-NOT: vpopne -; CHECK: ldmia sp!, {r7, pc} +; CHECK: pop {r7, pc} ; CHECK: vpop {d8} -; CHECK: ldmia sp!, {r7, pc} +; CHECK: pop {r7, pc} br i1 undef, label %if.else, label %if.then if.then: ; preds = %entry diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index bca2ae346a6f..3615055f8b29 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -11,7 +11,7 @@ entry: define i32 @t1(i32 %a, i32 %b) { ; CHECK: t1: -; CHECK: ldmialt sp!, {r7, pc} +; CHECK: poplt {r7, pc} entry: %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 5edf32fd1af6..232765768550 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -3,7 +3,7 @@ define void @foo(i32 %X, i32 %Y) { entry: ; CHECK: cmpne -; CHECK: ldmiahi sp! +; CHECK: pophi %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1] diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll index 62e13557cfdc..476ed4d47c64 100644 --- a/test/CodeGen/ARM/ifcvt7.ll +++ b/test/CodeGen/ARM/ifcvt7.ll @@ -6,7 +6,7 @@ define fastcc i32 @CountTree(%struct.quad_struct* %tree) { ; CHECK: cmpeq ; CHECK: moveq -; CHECK: ldmiaeq sp! +; CHECK: popeq entry: br label %tailrecurse diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll index 5fdfc4ea6805..ca9a5c63cda6 100644 --- a/test/CodeGen/ARM/ifcvt8.ll +++ b/test/CodeGen/ARM/ifcvt8.ll @@ -5,7 +5,7 @@ declare void @abort() define fastcc void @t(%struct.SString* %word, i8 signext %c) { -; CHECK: ldmiane sp! +; CHECK: popne entry: %tmp1 = icmp eq %struct.SString* %word, null ; [#uses=1] br i1 %tmp1, label %cond_true, label %cond_false diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 0aac9d16ec6c..f0ab9dd7ea00 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -14,15 +14,15 @@ entry: %1 = icmp eq i8* %0, null ; [#uses=1] ; indirect branch gets duplicated here ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, br i1 %1, label %bb3, label %bb2 bb2: ; preds = %entry, %bb3 %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; [#uses=1] ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1] bb3: ; preds = %entry @@ -42,20 +42,23 @@ L3: ; preds = %L4, %bb2 br label %L2 L2: ; preds = %L3, %bb2 +; THUMB: muls %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; [#uses=1] %phitmp = mul i32 %res.2, 6 ; [#uses=1] br label %L1 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] -; ARM: ldr r1, LCPI -; ARM: add r1, pc, r1 -; ARM: str r1 -; THUMB: ldr.n r2, LCPI -; THUMB: add r2, pc -; THUMB: str r2 -; THUMB2: ldr.n r2, LCPI -; THUMB2-NEXT: str r2 +; ARM: ldr [[R1:r[0-9]+]], LCPI +; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] +; ARM: str [[R1b]] +; THUMB: ldr.n +; THUMB: add +; THUMB: ldr.n [[R2:r[0-9]+]], LCPI +; THUMB: add [[R2]], pc +; THUMB: str [[R2]] +; THUMB2: ldr.n [[R2:r[0-9]+]], LCPI +; THUMB2-NEXT: str{{(.w)?}} [[R2]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index 9f77ad1f794c..9d6eba85301e 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -6,7 +6,7 @@ define void @t() nounwind { entry: ; CHECK: vmov.I64 q15, #0 -; CHECK: vmov.32 d30[0], r0 +; CHECK: vmov.32 d30[0], ; CHECK: vmov q8, q15 %tmp = alloca %struct.int32x4_t, align 16 call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind diff --git a/test/CodeGen/ARM/int-to-fp.ll b/test/CodeGen/ARM/int-to-fp.ll new file mode 100644 index 000000000000..889b14919840 --- /dev/null +++ b/test/CodeGen/ARM/int-to-fp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +; CHECK: sint_to_fp +; CHECK: vmovl.s16 +; CHECK: vcvt.f32.s32 +define <4 x float> @sint_to_fp(<4 x i16> %x) nounwind ssp { + %a = sitofp <4 x i16> %x to <4 x float> + ret <4 x float> %a +} + +; CHECK: uint_to_fp +; CHECK: vmovl.u16 +; CHECK: vcvt.f32.u32 +define <4 x float> @uint_to_fp(<4 x i16> %x) nounwind ssp { + %a = uitofp <4 x i16> %x to <4 x float> + ret <4 x float> %a +} diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll index 2f1b85ebbb04..db78fd06ab2d 100644 --- a/test/CodeGen/ARM/ldm.ll +++ b/test/CodeGen/ARM/ldm.ll @@ -5,9 +5,9 @@ define i32 @t1() { ; CHECK: t1: -; CHECK: ldmia +; CHECK: pop ; V4T: t1: -; V4T: ldmia +; V4T: pop %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; [#uses=1] @@ -16,9 +16,9 @@ define i32 @t1() { define i32 @t2() { ; CHECK: t2: -; CHECK: ldmia +; CHECK: pop ; V4T: t2: -; V4T: ldmia +; V4T: pop %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; [#uses=1] @@ -29,7 +29,7 @@ define i32 @t2() { define i32 @t3() { ; CHECK: t3: ; CHECK: ldmib -; CHECK: ldmia sp! +; CHECK: pop ; V4T: t3: ; V4T: ldmib ; V4T: pop diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 895562a1d31e..8010f20689be 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,19 +1,21 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6 -; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5 -; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI +; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5 +; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI ; rdar://r6949835 +; Magic ARM pair hints works best with linearscan. + @b = external global i64* define i64 @t(i64 %a) nounwind readonly { entry: -;V6: ldrd r2, [r2] +;V6: ldrd r2, r3, [r2] -;V5: ldr r3, [r2] -;V5: ldr r2, [r2, #4] +;V5: ldr r{{[0-9]+}}, [r2] +;V5: ldr r{{[0-9]+}}, [r2, #4] -;EABI: ldr r3, [r2] -;EABI: ldr r2, [r2, #4] +;EABI: ldr r{{[0-9]+}}, [r2] +;EABI: ldr r{{[0-9]+}}, [r2, #4] %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll index 74f8d783377d..e401dca1ca80 100644 --- a/test/CodeGen/ARM/long.ll +++ b/test/CodeGen/ARM/long.ll @@ -14,14 +14,14 @@ entry: define i64 @f3() { ; CHECK: f3: -; CHECK: mvn r0, #2, 2 +; CHECK: mvn r0, #2, #2 entry: ret i64 2147483647 } define i64 @f4() { ; CHECK: f4: -; CHECK: mov r0, #2, 2 +; CHECK: mov r0, #2, #2 entry: ret i64 2147483648 } @@ -29,7 +29,7 @@ entry: define i64 @f5() { ; CHECK: f5: ; CHECK: mvn r0, #0 -; CHECK: mvn r1, #2, 2 +; CHECK: mvn r1, #2, #2 entry: ret i64 9223372036854775807 } diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 5e4f5730f8d2..d5aac2e3ddaf 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,9 +24,10 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 -; CHECK-NEXT: movge r0, r1, asr r2 +; CHECK-NEXT: asrge r0, r1, r2 %a = ashr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -36,9 +37,10 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 -; CHECK-NEXT: movge r0, r1, lsr r2 +; CHECK-NEXT: lsrge r0, r1, r2 %a = lshr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 1bbb96deeefe..153fd8fe34e4 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} -; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} +; RUN: llc < %s | FileCheck %s ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: @@ -8,6 +7,11 @@ ; add r8, r0, r6 ; str r10, [r8, #+4] ; +; CHECK: ldr [[R6:r[0-9*]+]], LCP +; CHECK: cmp {{.*}}, [[R6]] +; CHECK: ldrle +; CHECK-NEXT: strle + target triple = "arm-apple-darwin8" define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) { diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 9882690da268..c1318ec31f58 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,11 +4,6 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index ed20c32dc0d5..5bae037cafb3 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s - %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +; The ARM magic hinting works best with linear scan. +; CHECK: ldrd +; CHECK: strd +; CHECK: ldrb + +%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } @src = external global %struct.x @dst = external global %struct.x diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll index e33797079093..de48feeb9ec2 100644 --- a/test/CodeGen/ARM/neon_div.ll +++ b/test/CodeGen/ARM/neon_div.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 diff --git a/test/CodeGen/ARM/neon_shift.ll b/test/CodeGen/ARM/neon_shift.ll new file mode 100644 index 000000000000..340f220fb362 --- /dev/null +++ b/test/CodeGen/ARM/neon_shift.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +; +define <4 x i16> @t1(<4 x i32> %a) nounwind { +entry: +; CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]*}}, #13 + %x = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> ) + ret <4 x i16> %x +} + +declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll new file mode 100644 index 000000000000..e670a5be3bca --- /dev/null +++ b/test/CodeGen/ARM/peephole-bitcast.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s + +; vmov s0, r0 + vmov r0, s0 should have been optimized away. +; rdar://9104514 + +; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan +; to remove it. + +define void @t(float %x) nounwind ssp { +entry: +; CHECK: t: +; CHECK-NOT: vmov +; CHECK: bl + %0 = bitcast float %x to i32 + %cmp = icmp ult i32 %0, 2139095039 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @doSomething(float %x) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare void @doSomething(float) diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index 895b27b749db..95f082aa9385 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -1,10 +1,15 @@ ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld -; RUN: llc < %s -march=thumb -mattr=+v7a | FileCheck %s -check-prefix=THUMB2 -; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP +; RUN: llc < %s -march=thumb -mattr=+v7a | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -march=arm -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP ; rdar://8601536 define void @t1(i8* %ptr) nounwind { entry: +; ARM: t1: +; ARM-NOT: pldw [r0] +; ARM: pld [r0] + ; ARM-MP: t1: ; ARM-MP: pldw [r0] ; ARM-MP: pld [r0] @@ -19,8 +24,8 @@ entry: define void @t2(i8* %ptr) nounwind { entry: -; ARM-MP: t2: -; ARM-MP: pld [r0, #1023] +; ARM: t2: +; ARM: pld [r0, #1023] ; THUMB2: t2: ; THUMB2: pld [r0, #1023] @@ -31,8 +36,8 @@ entry: define void @t3(i32 %base, i32 %offset) nounwind { entry: -; ARM-MP: t3: -; ARM-MP: pld [r0, r1, lsr #2] +; ARM: t3: +; ARM: pld [r0, r1, lsr #2] ; THUMB2: t3: ; THUMB2: lsrs r1, r1, #2 @@ -46,8 +51,8 @@ entry: define void @t4(i32 %base, i32 %offset) nounwind { entry: -; ARM-MP: t4: -; ARM-MP: pld [r0, r1, lsl #2] +; ARM: t4: +; ARM: pld [r0, r1, lsl #2] ; THUMB2: t4: ; THUMB2: pld [r0, r1, lsl #2] diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 53214fd4c302..d350937c683e 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's. %struct.int16x8_t = type { <8 x i16> } @@ -123,9 +124,9 @@ return1: return2: ; CHECK: %return2 ; CHECK: vadd.i32 -; CHECK: vmov q9, q11 +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vst2.32 {d16, d17, d18, d19} +; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}} %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] @@ -137,9 +138,10 @@ return2: define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: t5: ; CHECK: vldmia -; CHECK: vmov q9, q8 +; How can FileCheck match Q and D registers? We need a lisp interpreter. +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vld2.16 {d16[1], d18[1]}, [r0] +; CHECK: vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] ; CHECK-NOT: vmov ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; [#uses=1] @@ -154,8 +156,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: t6: ; CHECK: vldr.64 -; CHECK: vmov d17, d16 -; CHECK-NEXT: vld2.8 {d16[1], d17[1]} +; CHECK: vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]] +; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] @@ -169,10 +171,10 @@ entry: ; CHECK: t7: ; CHECK: vld2.32 ; CHECK: vst2.32 -; CHECK: vld1.32 {d16, d17}, -; CHECK: vmov q9, q8 +; CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, +; CHECK: vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]] ; CHECK-NOT: vmov -; CHECK: vuzp.32 q8, q9 +; CHECK: vuzp.32 q[[Q1]], q[[Q0]] ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; [#uses=2] %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] @@ -271,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: ; CHECK: vmul.f32 q8, q8, d0[0] -; CHECK: vmov.i32 q9, #0x3F000000 +; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000 ; CHECK: vadd.f32 q8, q8, q8 %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index 687bf8834c9f..4170ff3071ad 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s -define i32 @test1(i32 %X) { +define i32 @test1(i32 %X) nounwind { ; CHECK: test1 ; CHECK: rev16 r0, r0 %tmp1 = lshr i32 %X, 8 @@ -16,7 +16,7 @@ define i32 @test1(i32 %X) { ret i32 %tmp14 } -define i32 @test2(i32 %X) { +define i32 @test2(i32 %X) nounwind { ; CHECK: test2 ; CHECK: revsh r0, r0 %tmp1 = lshr i32 %X, 8 @@ -28,3 +28,29 @@ define i32 @test2(i32 %X) { %tmp5.upgrd.2 = sext i16 %tmp5 to i32 ret i32 %tmp5.upgrd.2 } + +; rdar://9147637 +define i32 @test3(i16 zeroext %a) nounwind { +entry: +; CHECK: test3: +; CHECK: revsh r0, r0 + %0 = tail call i16 @llvm.bswap.i16(i16 %a) + %1 = sext i16 %0 to i32 + ret i32 %1 +} + +declare i16 @llvm.bswap.i16(i16) nounwind readnone + +define i32 @test4(i16 zeroext %a) nounwind { +entry: +; CHECK: test4: +; CHECK: revsh r0, r0 + %conv = zext i16 %a to i32 + %shr9 = lshr i16 %a, 8 + %conv2 = zext i16 %shr9 to i32 + %shl = shl nuw nsw i32 %conv, 8 + %or = or i32 %conv2, %shl + %sext = shl i32 %or, 16 + %conv8 = ashr exact i32 %sext, 16 + ret i32 %conv8 +} diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index 578834ec93bc..82ed0184badd 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -6,7 +6,7 @@ define i32 @t1(i32 %c) nounwind readnone { entry: ; ARM: t1: ; ARM: mov r1, #101 -; ARM: orr r1, r1, #1, 24 +; ARM: orr r1, r1, #1, #24 ; ARM: movgt r0, #123 ; ARMT2: t1: @@ -27,7 +27,7 @@ entry: ; ARM: t2: ; ARM: mov r0, #123 ; ARM: movgt r0, #101 -; ARM: orrgt r0, r0, #1, 24 +; ARM: orrgt r0, r0, #1, #24 ; ARMT2: t2: ; ARMT2: mov r0, #123 diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 1aa0d3904125..d1493ee56e4b 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -90,3 +90,26 @@ define arm_apcscc float @f8(i32 %a) nounwind { %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 ret float %tmp1 } + +; +; Glue values can only have a single use, but the following test exposed a +; case where a SELECT was lowered with 2 uses of a comparison, causing the +; scheduler to assert. +; CHECK-VFP: f9: + +declare i8* @objc_msgSend(i8*, i8*, ...) +define void @f9() optsize { +entry: + %cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*) + %conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00 + %conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00 + %add = fadd double %conv195, 1.100000e+01 + %conv196 = fptrunc double %add to float + %add201 = fadd float undef, %conv191 + %tmp484 = bitcast float %conv196 to i32 + %tmp478 = bitcast float %add201 to i32 + %tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0 + %tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1 + call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize + ret void +} diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 5dabfc3a82a3..4211797ef77e 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -4,7 +4,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM: t1: -; ARM: sub r0, r1, #6, 2 +; ARM: sub r0, r1, #6, #2 ; ARM: movgt r0, r1 ; T2: t1: diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 01e3a922f656..f0e2d102610d 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -51,19 +51,19 @@ entry: declare i8* @malloc(...) -define fastcc void @test4() nounwind { +define fastcc void @test4(i16 %addr) nounwind { entry: ; A8: test4: -; A8: ldr r1, [r0, r0, lsl #2] -; A8: str r1, [r0, r0, lsl #2] +; A8: ldr r2, [r0, r1, lsl #2] +; A8: str r2, [r0, r1, lsl #2] ; A9: test4: -; A9: add r0, r0, r0, lsl #2 +; A9: add r0, r0, r{{[0-9]+}}, lsl #2 ; A9: ldr r1, [r0] ; A9: str r1, [r0] %0 = tail call i8* (...)* @malloc(i32 undef) nounwind %1 = bitcast i8* %0 to i32* - %2 = sext i16 undef to i32 + %2 = sext i16 %addr to i32 %3 = getelementptr inbounds i32* %1, i32 %2 %4 = load i32* %3, align 4 %5 = add nsw i32 %4, 1 diff --git a/test/CodeGen/ARM/shuffle.ll b/test/CodeGen/ARM/shuffle.ll new file mode 100644 index 000000000000..7d6be4f5e6c3 --- /dev/null +++ b/test/CodeGen/ARM/shuffle.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin" + +define <8 x i8> @shuf(<8 x i8> %a) nounwind readnone optsize ssp { +entry: +; CHECK: vtbl + %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @shuf2(<8 x i8> %a, <8 x i8> %b) nounwind readnone optsize ssp { +entry: +; CHECK: vtbl + %shuffle = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle +} diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index b7ab2e796f8a..686d791ce60d 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -1,16 +1,12 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -march=arm -mattr=+v5TE -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smulbt | count 1 -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smultt | count 1 -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smlabt | count 1 +; RUN: llc < %s -march=arm -mcpu=generic +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s @x = weak global i16 0 ; [#uses=1] @y = weak global i16 0 ; [#uses=0] define i32 @f1(i32 %y) { +; CHECK: f1 +; CHECK: smulbt %tmp = load i16* @x ; [#uses=1] %tmp1 = add i16 %tmp, 2 ; [#uses=1] %tmp2 = sext i16 %tmp1 to i32 ; [#uses=1] @@ -20,6 +16,8 @@ define i32 @f1(i32 %y) { } define i32 @f2(i32 %x, i32 %y) { +; CHECK: f2 +; CHECK: smultt %tmp1 = ashr i32 %x, 16 ; [#uses=1] %tmp3 = ashr i32 %y, 16 ; [#uses=1] %tmp4 = mul i32 %tmp3, %tmp1 ; [#uses=1] @@ -27,6 +25,8 @@ define i32 @f2(i32 %x, i32 %y) { } define i32 @f3(i32 %a, i16 %x, i32 %y) { +; CHECK: f3 +; CHECK: smlabt %tmp = sext i16 %x to i32 ; [#uses=1] %tmp2 = ashr i32 %y, 16 ; [#uses=1] %tmp3 = mul i32 %tmp2, %tmp ; [#uses=1] diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index 465c7e676c56..b24f75a6e2b8 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s + +; The greedy register allocator uses a single CSR here, invalidating the test. @b = external global i64* diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll index 81513e23e807..555b18eb1e99 100644 --- a/test/CodeGen/ARM/sub.ll +++ b/test/CodeGen/ARM/sub.ll @@ -12,7 +12,7 @@ define i64 @f1(i64 %a) { ; 66846720 = 0x03fc0000 define i64 @f2(i64 %a) { ; CHECK: f2 -; CHECK: subs r0, r0, #255, 14 +; CHECK: subs r0, r0, #255, #14 ; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 66846720 ret i64 %tmp @@ -27,3 +27,12 @@ define i64 @f3(i64 %a) { ret i64 %tmp } +define i32 @f4(i32 %x) { +entry: +; CHECK: f4 +; CHECK: rsbs + %sub = sub i32 1, %x + %cmp = icmp ugt i32 %sub, 0 + %sel = select i1 %cmp, i32 1, i32 %sub + ret i32 %sel +} diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll index 25093fee225a..aa88ae0c1a86 100644 --- a/test/CodeGen/ARM/thumb1-varalloc.ll +++ b/test/CodeGen/ARM/thumb1-varalloc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s ; rdar://8819685 @__bar = external hidden global i8* @@ -12,12 +13,13 @@ entry: %0 = load i8** @__bar, align 4 %1 = icmp eq i8* %0, null br i1 %1, label %bb1, label %bb3 +; CHECK: bne bb1: store i32 1026, i32* %size, align 4 %2 = alloca [1026 x i8], align 1 -; CHECK: mov r0, sp -; CHECK: adds r4, r0, r4 +; CHECK: mov [[R0:r[0-9]+]], sp +; CHECK: adds {{r[0-9]+}}, [[R0]], {{r[0-9]+}} %3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0 %4 = call i32 @_called_func(i8* %3, i32* %size) nounwind %5 = icmp eq i32 %4, 0 diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll index b2f6b6e69fa5..38842a9646ff 100644 --- a/test/CodeGen/ARM/trap.ll +++ b/test/CodeGen/ARM/trap.ll @@ -1,10 +1,15 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR +; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC ; rdar://7961298 +; rdar://9249183 define void @t() nounwind { entry: -; CHECK: t: -; CHECK: trap +; INSTR: t: +; INSTR: trap + +; FUNC: t: +; FUNC: bl __trap call void @llvm.trap() unreachable } diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll index aa7d28a62349..fa5c0168fefe 100644 --- a/test/CodeGen/ARM/umulo-32.ll +++ b/test/CodeGen/ARM/umulo-32.ll @@ -12,3 +12,30 @@ define i32 @func(i32 %a) nounwind { } declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +define i32 @f(i32 %argc, i8** %argv) ssp { +; CHECK: func +; CHECK: str r0 +; CHECK: movs r2 +; CHECK: mov r1 +; CHECK: mov r3 +; CHECK: muldi3 +%1 = alloca i32, align 4 +%2 = alloca i32, align 4 +%3 = alloca i8**, align 4 +%m_degree = alloca i32, align 4 +store i32 0, i32* %1 +store i32 %argc, i32* %2, align 4 +store i8** %argv, i8*** %3, align 4 +store i32 10, i32* %m_degree, align 4 +%4 = load i32* %m_degree, align 4 +%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8) +%6 = extractvalue %umul.ty %5, 1 +%7 = extractvalue %umul.ty %5, 0 +%8 = select i1 %6, i32 -1, i32 %7 +%9 = call noalias i8* @_Znam(i32 %8) +%10 = bitcast i8* %9 to double* +ret i32 0 +} + +declare noalias i8* @_Znam(i32) diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index b42e11f2c4ab..a8237c60e4e0 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -8,14 +8,14 @@ define void @t(i8* nocapture %a, i8* nocapture %b) nounwind { entry: ; GENERIC: t: -; GENERIC: ldrb r2 -; GENERIC: ldrb r3 -; GENERIC: ldrb r12 -; GENERIC: ldrb r1 -; GENERIC: strb r1 -; GENERIC: strb r12 -; GENERIC: strb r3 -; GENERIC: strb r2 +; GENERIC: ldrb [[R2:r[0-9]+]] +; GENERIC: ldrb [[R3:r[0-9]+]] +; GENERIC: ldrb [[R12:r[0-9]+]] +; GENERIC: ldrb [[R1:r[0-9]+]] +; GENERIC: strb [[R1]] +; GENERIC: strb [[R12]] +; GENERIC: strb [[R3]] +; GENERIC: strb [[R2]] ; DARWIN_V6: t: ; DARWIN_V6: ldr r1 diff --git a/test/CodeGen/ARM/undef-sext.ll b/test/CodeGen/ARM/undef-sext.ll new file mode 100644 index 000000000000..2c28da3b6461 --- /dev/null +++ b/test/CodeGen/ARM/undef-sext.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s + +; No need to sign-extend undef. + +define i32 @t(i32* %a) nounwind { +entry: +; CHECK: t: +; CHECK: ldr r0, [r0] +; CHECK: bx lr + %0 = sext i16 undef to i32 + %1 = getelementptr inbounds i32* %a, i32 %0 + %2 = load i32* %1, align 4 + ret i32 %2 +} diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll index 7cb976236dc5..bb4045311624 100644 --- a/test/CodeGen/ARM/va_arg.ll +++ b/test/CodeGen/ARM/va_arg.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s ; Test that we correctly align elements when using va_arg ; CHECK: test1: ; CHECK-NOT: bfc -; CHECK: add r0, r0, #7 -; CHECK: bfc r0, #0, #3 +; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 +; CHECK: bfc [[REG]], #0, #3 ; CHECK-NOT: bfc define i64 @test1(i32 %i, ...) nounwind optsize { @@ -19,8 +19,8 @@ entry: ; CHECK: test2: ; CHECK-NOT: bfc -; CHECK: add r0, r0, #7 -; CHECK: bfc r0, #0, #3 +; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 +; CHECK: bfc [[REG]], #0, #3 ; CHECK-NOT: bfc ; CHECK: bx lr diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll new file mode 100644 index 000000000000..14e668efb1da --- /dev/null +++ b/test/CodeGen/ARM/vbsl-constant.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: v_bsli8: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = and <8 x i8> %tmp1, + %tmp6 = and <8 x i8> %tmp3, + %tmp7 = or <8 x i8> %tmp4, %tmp6 + ret <8 x i8> %tmp7 +} + +define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: v_bsli16: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = and <4 x i16> %tmp1, + %tmp6 = and <4 x i16> %tmp3, + %tmp7 = or <4 x i16> %tmp4, %tmp6 + ret <4 x i16> %tmp7 +} + +define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: v_bsli32: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = and <2 x i32> %tmp1, + %tmp6 = and <2 x i32> %tmp3, + %tmp7 = or <2 x i32> %tmp4, %tmp6 + ret <2 x i32> %tmp7 +} + +define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { +;CHECK: v_bsli64: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = load <1 x i64>* %C + %tmp4 = and <1 x i64> %tmp1, + %tmp6 = and <1 x i64> %tmp3, + %tmp7 = or <1 x i64> %tmp4, %tmp6 + ret <1 x i64> %tmp7 +} + +define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { +;CHECK: v_bslQi8: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = load <16 x i8>* %C + %tmp4 = and <16 x i8> %tmp1, + %tmp6 = and <16 x i8> %tmp3, + %tmp7 = or <16 x i8> %tmp4, %tmp6 + ret <16 x i8> %tmp7 +} + +define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: v_bslQi16: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = load <8 x i16>* %C + %tmp4 = and <8 x i16> %tmp1, + %tmp6 = and <8 x i16> %tmp3, + %tmp7 = or <8 x i16> %tmp4, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: v_bslQi32: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = load <4 x i32>* %C + %tmp4 = and <4 x i32> %tmp1, + %tmp6 = and <4 x i32> %tmp3, + %tmp7 = or <4 x i32> %tmp4, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { +;CHECK: v_bslQi64: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = load <2 x i64>* %C + %tmp4 = and <2 x i64> %tmp1, + %tmp6 = and <2 x i64> %tmp3, + %tmp7 = or <2 x i64> %tmp4, %tmp6 + ret <2 x i64> %tmp7 +} diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index c3c4cb356307..2243bac91fb1 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vcgts8: @@ -161,9 +162,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ; rdar://7923010 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vcgt_zext: -;CHECK: vmov.i32 q10, #0x1 -;CHECK: vcgt.f32 q8 -;CHECK: vand q8, q8, q10 +;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1 +;CHECK: vcgt.f32 [[Q1:q[0-9]+]] +;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]] %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll index 3ab0cfcbbc77..81bdc44863b7 100644 --- a/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/test/CodeGen/ARM/vector-DAGCombine.ll @@ -105,3 +105,21 @@ define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind { store i64 %t1, i64* %ptr ret void } + +; Test trying to do a AND Combine on illegal types. +define void @andVec(<3 x i8>* %A) nounwind { + %tmp = load <3 x i8>* %A, align 4 + %and = and <3 x i8> %tmp, + store <3 x i8> %and, <3 x i8>* %A + ret void +} + + +; Test trying to do an OR Combine on illegal types. +define void @orVec(<3 x i8>* %A) nounwind { + %tmp = load <3 x i8>* %A, align 4 + %or = or <3 x i8> %tmp, + store <3 x i8> %or, <3 x i8>* %A + ret void +} + diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 55abefef0fa7..49a042b7e1f5 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -125,11 +125,11 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ; The actual shuffle code only handles some cases, make sure we check ; this rather than blindly emitting a VECTOR_SHUFFLE (infinite ; lowering loop can result otherwise). -define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind { +define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: test_illegal: -;CHECK: vst1.8 - %tmp1 = load <16 x i8>* %A - %tmp2 = load <16 x i8>* %B - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> - ret <8 x i8> %tmp3 +;CHECK: vst1.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + ret <8 x i16> %tmp3 } diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index 44a44afe9af4..49a69827bc05 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s define void @test(float* %P, double* %D) { %A = load float* %P ; [#uses=1] @@ -40,9 +41,9 @@ define void @test_ext_round(float* %P, double* %D) { ;CHECK: test_ext_round: %a = load float* %P ; [#uses=1] ;CHECK: vcvt.f64.f32 +;CHECK: vcvt.f32.f64 %b = fpext float %a to double ; [#uses=1] %A = load double* %D ; [#uses=1] -;CHECK: vcvt.f32.f64 %B = fptrunc double %A to float ; [#uses=1] store double %b, double* %D store float %B, float* %P diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c886125a2fb0..e524395c501a 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: @@ -19,7 +20,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;Check for a post-increment updating load. define <4 x i16> @vld1i16_update(i16** %ptr) nounwind { ;CHECK: vld1i16_update: -;CHECK: vld1.16 {d16}, [r1]! +;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]! %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) @@ -39,7 +40,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;Check for a post-increment updating load with register increment. define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind { ;CHECK: vld1i32_update: -;CHECK: vld1.32 {d16}, [r2], r1 +;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) @@ -75,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;Check for a post-increment updating load. define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind { ;CHECK: vld1Qi8_update: -;CHECK: vld1.8 {d16, d17}, [r1, :64]! +;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]! %A = load i8** %ptr %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) %tmp2 = getelementptr i8* %A, i32 16 @@ -132,8 +133,6 @@ declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly ; Do not crash if the vld1 result is not used. define void @unused_vld1_result() { entry: -;CHECK: unused_vld1_result -;CHECK: vld1.32 %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) call void @llvm.trap() unreachable diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index dde530f6df1f..b495319830b0 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -36,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;Check for a post-increment updating load with register increment. define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind { ;CHECK: vld3i16_update: -;CHECK: vld3.16 {d16, d17, d18}, [r2], r1 +;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) @@ -121,8 +122,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;Check for a post-increment updating load. define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind { ;CHECK: vld3Qi32_update: -;CHECK: vld3.32 {d16, d18, d20}, [r1]! -;CHECK: vld3.32 {d17, d19, d21}, [r1]! +;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]! +;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]! %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 770ed071ac12..805aad51d4fd 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld1lanei8: @@ -279,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;Check for a post-increment updating load with register increment. define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind { ;CHECK: vld3laneQi16_update: -;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1 +;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B @@ -490,7 +491,7 @@ declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x flo ; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because ; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low ; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.) -define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { +define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { ;CHECK: test_qqqq_regsequence_subreg ;CHECK: vld3.16 %tmp63 = extractvalue [6 x i64] %b, 5 @@ -499,8 +500,12 @@ define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { %ins67 = or i128 %tmp65, 0 %tmp78 = bitcast i128 %ins67 to <8 x i16> %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2) - call void @llvm.trap() - unreachable + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + ret <8 x i16> %tmp7 } declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index ee033caa00d0..1fd6581ae081 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmuli8: @@ -158,6 +158,15 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i16> %tmp5 } +define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulls8_int: +;CHECK: vmull.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmulls16: ;CHECK: vmull.s16 @@ -169,6 +178,15 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i32> %tmp5 } +define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmulls16_int: +;CHECK: vmull.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmulls32: ;CHECK: vmull.s32 @@ -180,6 +198,15 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i64> %tmp5 } +define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmulls32_int: +;CHECK: vmull.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmullu8: ;CHECK: vmull.u8 @@ -191,6 +218,15 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i16> %tmp5 } +define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullu8_int: +;CHECK: vmull.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmullu16: ;CHECK: vmull.u16 @@ -202,6 +238,15 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i32> %tmp5 } +define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmullu16_int: +;CHECK: vmull.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmullu32: ;CHECK: vmull.u32 @@ -213,6 +258,15 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i64> %tmp5 } +define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmullu32_int: +;CHECK: vmull.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmullp8: ;CHECK: vmull.p8 @@ -233,6 +287,15 @@ entry: ret <4 x i32> %3 } +define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes16_int +; CHECK: vmull.s16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { entry: ; CHECK: test_vmull_lanes32 @@ -244,6 +307,15 @@ entry: ret <2 x i64> %3 } +define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes32_int +; CHECK: vmull.s32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { entry: ; CHECK: test_vmull_laneu16 @@ -255,6 +327,15 @@ entry: ret <4 x i32> %3 } +define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu16_int +; CHECK: vmull.u16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { entry: ; CHECK: test_vmull_laneu32 @@ -266,6 +347,23 @@ entry: ret <2 x i64> %3 } +define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu32_int +; CHECK: vmull.u32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone @@ -339,3 +437,58 @@ define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind { %tmp4 = mul <2 x i64> %tmp3, ret <2 x i64> %tmp4 } + +; rdar://9197392 +define void @distribue(i16* %dst, i8* %src, i32 %mul) nounwind { +entry: +; CHECK: distribue: +; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]] +; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]] + %0 = trunc i32 %mul to i8 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1) + %4 = bitcast <16 x i8> %3 to <2 x double> + %5 = extractelement <2 x double> %4, i32 1 + %6 = bitcast double %5 to <8 x i8> + %7 = zext <8 x i8> %6 to <8 x i16> + %8 = zext <8 x i8> %2 to <8 x i16> + %9 = extractelement <2 x double> %4, i32 0 + %10 = bitcast double %9 to <8 x i8> + %11 = zext <8 x i8> %10 to <8 x i16> + %12 = add <8 x i16> %7, %11 + %13 = mul <8 x i16> %12, %8 + %14 = bitcast i16* %dst to i8* + tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2) + ret void +} + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind + +; Take advantage of the Cortex-A8 multiplier accumulator forward. + +%struct.uint8x8_t = type { <8 x i8> } + +define void @distribue2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind { +entry: +; CHECK: distribue2 +; CHECK-NOT: vadd.i8 +; CHECK: vmul.i8 +; CHECK: vmla.i8 + %0 = trunc i32 %mul to i8 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1) + %4 = bitcast <16 x i8> %3 to <2 x double> + %5 = extractelement <2 x double> %4, i32 1 + %6 = bitcast double %5 to <8 x i8> + %7 = extractelement <2 x double> %4, i32 0 + %8 = bitcast double %7 to <8 x i8> + %9 = add <8 x i8> %6, %8 + %10 = mul <8 x i8> %9, %2 + %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0 + store <8 x i8> %10, <8 x i8>* %11, align 8 + ret void +} diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index d262303bc60e..e3372a03793d 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll index 3ee5e8df9972..50fccb440990 100644 --- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll +++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs +; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy ; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a ; super-register is illegally extended. diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll index 42b41b3bf29b..87376ef6ed53 100644 --- a/test/CodeGen/CellSPU/jumptable.ll +++ b/test/CodeGen/CellSPU/jumptable.ll @@ -1,4 +1,4 @@ -;RUN: llc --march=cellspu %s -o - | FileCheck %s +;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s ; This is to check that emitting jumptables doesn't crash llc define i32 @test(i32 %param) { entry: diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll index 03d7ad1153a1..4771752f5f4c 100644 --- a/test/CodeGen/CellSPU/loads.ll +++ b/test/CodeGen/CellSPU/loads.ll @@ -50,3 +50,10 @@ define i32 @load_misaligned( i32* %ptr ){ %rv = load i32* %ptr, align 2 ret i32 %rv } + +define <4 x i32> @load_null_vec( ) { +;CHECK: lqa +;CHECK: bi $lr + %rv = load <4 x i32>* null + ret <4 x i32> %rv +} diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index e1172089c703..b1219e6f56e5 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -3,9 +3,9 @@ ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 -; RUN grep rothi.*5 %t1.s | count 2 -; RUN grep rothi.*11 %t1.s | count 1 -; RUN grep rothi.*,.3 %t1.s | count 1 +; RUN: grep rothi.*5 %t1.s | count 2 +; RUN: grep rothi.*11 %t1.s | count 1 +; RUN: grep rothi.*,.3 %t1.s | count 1 ; RUN: grep andhi %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 ; RUN: cat %t1.s | FileCheck %s diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 92390abf9465..c4a5abd29042 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep {shlh } %t1.s | count 9 +; RUN: grep {shlh } %t1.s | count 10 ; RUN: grep {shlhi } %t1.s | count 3 -; RUN: grep {shl } %t1.s | count 9 +; RUN: grep {shl } %t1.s | count 11 ; RUN: grep {shli } %t1.s | count 3 ; RUN: grep {xshw } %t1.s | count 5 ; RUN: grep {and } %t1.s | count 14 @@ -14,15 +14,12 @@ ; RUN: grep {rotqbyi } %t1.s | count 1 ; RUN: grep {rotqbii } %t1.s | count 2 ; RUN: grep {rotqbybi } %t1.s | count 1 -; RUN: grep {sfi } %t1.s | count 4 +; RUN: grep {sfi } %t1.s | count 6 ; RUN: cat %t1.s | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" -; Vector shifts are not currently supported in gcc or llvm assembly. These are -; not tested. - ; Shift left i16 via register, note that the second operand to shl is promoted ; to a 32-bit type: @@ -293,3 +290,55 @@ define i128 @test_lshr_i128( i128 %val ) { %rv = lshr i128 %val, 64 ret i128 %rv } + +;Vector shifts +define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <2 x i32> %val, %sh + ret <2 x i32> %rv +} + +define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: shlh +;CHECK: bi $lr + %rv = shl <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotm +;CHECK: bi $lr + %rv = lshr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rothm +;CHECK: bi $lr + %rv = lshr <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotma +;CHECK: bi $lr + %rv = ashr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rotmah +;CHECK: bi $lr + %rv = ashr <8 x i16> %val, %sh + ret <8 x i16> %rv +} diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll index 7e0bf06b4e45..6ca5b0892304 100644 --- a/test/CodeGen/CellSPU/stores.ll +++ b/test/CodeGen/CellSPU/stores.ll @@ -171,3 +171,11 @@ define void @store_v8( <8 x float> %val, <8 x float>* %ptr ) store <8 x float> %val, <8 x float>* %ptr ret void } + +define void @store_null_vec( <4 x i32> %val ) { +; FIXME - this is for some reason compiled into a il+stqd, not a sta. +;CHECK: stqd +;CHECK: bi $lr + store <4 x i32> %val, <4 x i32>* null + ret void +} diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll index efd032031002..09e15ffbc75d 100644 --- a/test/CodeGen/CellSPU/v2f32.ll +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -33,6 +33,7 @@ define %vec @test_mul(%vec %param) ret %vec %1 } +; CHECK: test_splat: define %vec @test_splat(float %param ) { ;CHECK: lqa ;CHECK: shufb @@ -43,16 +44,17 @@ define %vec @test_splat(float %param ) { } define void @test_store(%vec %val, %vec* %ptr){ - +; CHECK: test_store: ;CHECK: stqd - store %vec undef, %vec* null + store %vec zeroinitializer, %vec* null -;CHECK: stqd $3, 0(${{.}}) +;CHECK: stqd $3, 0(${{.*}}) ;CHECK: bi $lr store %vec %val, %vec* %ptr ret void } +; CHECK: test_insert: define %vec @test_insert(){ ;CHECK: cwd ;CHECK: shufb $3 @@ -61,6 +63,8 @@ define %vec @test_insert(){ ret %vec %rv } +; CHECK: test_unaligned_store: + define void @test_unaligned_store() { ;CHECK: cdd ;CHECK: shufb @@ -68,7 +72,7 @@ define void @test_unaligned_store() { %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; [#uses=1] %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] - store <2 x float> undef, <2 x float>* %vptr + store <2 x float> zeroinitializer, <2 x float>* %vptr ret void } diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll index 042739884df7..e7cc7e339406 100644 --- a/test/CodeGen/Generic/crash.ll +++ b/test/CodeGen/Generic/crash.ll @@ -38,3 +38,31 @@ unreachable declare void @Parse_Vector(double*) declare i32 @llvm.objectsize.i32(i8*, i1) + +; PR9578 +%struct.S0 = type { i32, i8, i32 } + +define void @func_82() nounwind optsize { +entry: + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + br i1 undef, label %func_74.exit.for.cond29.thread_crit_edge, label %for.body.i + +func_74.exit.for.cond29.thread_crit_edge: ; preds = %for.body.i + %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1 + store i8 0, i8* %f13576.pre, align 4, !tbaa !0 + br label %lbl_468 + +lbl_468: ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge + %f13577.ph = phi i8* [ %f13576.pre, %func_74.exit.for.cond29.thread_crit_edge ], [ %f135.pre, %lbl_468 ] + store i8 1, i8* %f13577.ph, align 1 + %f135.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1 + br i1 undef, label %lbl_468, label %for.end74 + +for.end74: ; preds = %lbl_468 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll index f9c6205bc19f..5444f82dd63c 100644 --- a/test/CodeGen/MBlaze/fsl.ll +++ b/test/CodeGen/MBlaze/fsl.ll @@ -3,7 +3,7 @@ ; dynamic version of the instructions and that constant values use the ; constant version of the instructions. ; -; RUN: llc < %s -march=mblaze | FileCheck %s +; RUN: llc -O3 < %s -march=mblaze | FileCheck %s declare i32 @llvm.mblaze.fsl.get(i32 %port) declare i32 @llvm.mblaze.fsl.aget(i32 %port) @@ -55,8 +55,7 @@ declare void @llvm.mblaze.fsl.tnaput(i32 %port) declare void @llvm.mblaze.fsl.tncput(i32 %port) declare void @llvm.mblaze.fsl.tncaput(i32 %port) -define i32 @fsl_get(i32 %port) -{ +define void @fsl_get(i32 %port) { ; CHECK: fsl_get: %v0 = call i32 @llvm.mblaze.fsl.get(i32 %port) ; CHECK: getd @@ -122,12 +121,11 @@ define i32 @fsl_get(i32 %port) ; CHECK-NEXT: tnecgetd %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port) ; CHECK-NEXT: tnecagetd - ret i32 1 + ret void ; CHECK: rtsd } -define i32 @fslc_get() -{ +define void @fslc_get() { ; CHECK: fslc_get: %v0 = call i32 @llvm.mblaze.fsl.get(i32 1) ; CHECK: get @@ -224,12 +222,11 @@ define i32 @fslc_get() %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1) ; CHECK-NOT: tnecagetd ; CHECK: tnecaget - ret i32 1 + ret void ; CHECK: rtsd } -define void @putfsl(i32 %value, i32 %port) -{ +define void @putfsl(i32 %value, i32 %port) { ; CHECK: putfsl: call void @llvm.mblaze.fsl.put(i32 %value, i32 %port) ; CHECK: putd @@ -267,8 +264,7 @@ define void @putfsl(i32 %value, i32 %port) ; CHECK: rtsd } -define void @putfsl_const(i32 %value) -{ +define void @putfsl_const(i32 %value) { ; CHECK: putfsl_const: call void @llvm.mblaze.fsl.put(i32 %value, i32 1) ; CHECK-NOT: putd diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll index 8973f75aa1dc..7439d0b6fe22 100644 --- a/test/CodeGen/MBlaze/loop.ll +++ b/test/CodeGen/MBlaze/loop.ll @@ -29,14 +29,12 @@ loop_inner_finish: %inner.5 = add i32 %inner.2, 1 call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0), i32 %inner.0, i32 %inner.1, i32 %inner.2 ) - ; CHECK: brlid - ; CHECK: addik {{.*, 1}} %inner.6 = icmp eq i32 %inner.5, 100 - ; CHECK: cmp + ; CHECK: cmp [[REG:r[0-9]*]] br i1 %inner.6, label %loop_inner, label %loop_outer_finish - ; CHECK: {{beq|bne}} + ; CHECK: {{beqid|bneid}} [[REG]] loop_outer_finish: %outer.1 = add i32 %outer.0, 1 diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll index 20bd88889061..94dfe35faba1 100644 --- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll +++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=mips -o %t ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2 -; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1 +; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index ca837ffd2a50..519e4b93a72b 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -2,6 +2,10 @@ ; RUN: grep {c\\..*\\.s} %t | count 3 ; RUN: grep {bc1\[tf\]} %t | count 3 +; FIXME: Disabled because branch instructions are generated where +; conditional move instructions are expected. +; REQUIRES: disabled + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index 52a4b081ddb3..e85a749f7dcd 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 +; FIXME: Disabled because branch instructions are generated where +; conditional move instructions are expected. +; REQUIRES: disabled + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll index 7be7974e0ffe..6dd4af111cd9 100644 --- a/test/CodeGen/Mips/2008-08-06-Alloca.ll +++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2 +; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll index 891b5d9e1884..e5e2c5473770 100644 --- a/test/CodeGen/Mips/2010-07-20-Select.ll +++ b/test/CodeGen/Mips/2010-07-20-Select.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s +; RUN: llc < %s -march=mips -relocation-model=static -regalloc=basic | FileCheck %s ; Fix PR7473 define i32 @main() nounwind readnone { @@ -9,12 +10,12 @@ entry: volatile store i32 0, i32* %c, align 4 %0 = volatile load i32* %a, align 4 ; [#uses=1] %1 = icmp eq i32 %0, 0 ; [#uses=1] -; CHECK: addiu $3, $zero, 0 +; CHECK: addiu $[[R1:[0-9]+]], $zero, 0 %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; [#uses=1] %2 = volatile load i32* %c, align 4 ; [#uses=1] %3 = icmp eq i32 %2, 0 ; [#uses=1] -; CHECK: addiu $3, $zero, 3 -; CHECK: addu $2, $5, $3 +; CHECK: addiu $[[R1]], $zero, 3 +; CHECK: addu $2, ${{.}}, $[[R1]] %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; [#uses=1] %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; [#uses=1] ret i32 %4 diff --git a/test/CodeGen/Mips/addc.ll b/test/CodeGen/Mips/addc.ll new file mode 100644 index 000000000000..e5d05b1d6dbb --- /dev/null +++ b/test/CodeGen/Mips/addc.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s +; RUN: llc < %s -march=mips | FileCheck %s + +define void @f(i64 %l, i64* nocapture %p) nounwind { +entry: +; CHECK: lui +; CHECK: ori +; CHECK: addu + %add = add i64 %l, 1311768467294899695 + store i64 %add, i64* %p, align 4 + ret void +} + diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll new file mode 100644 index 000000000000..8f0bdf286c52 --- /dev/null +++ b/test/CodeGen/Mips/analyzebranch.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=mips < %s | FileCheck %s + +define double @foo(double %a, double %b) nounwind readnone { +entry: +; CHECK: bc1f $BB0_2 +; CHECK: nop +; CHECK: # BB#1: + + %cmp = fcmp ogt double %a, 0.000000e+00 + br i1 %cmp, label %if.end6, label %if.else + +if.else: ; preds = %entry + %cmp3 = fcmp ogt double %b, 0.000000e+00 + br i1 %cmp3, label %if.end6, label %return + +if.end6: ; preds = %if.else, %entry + %c.0 = phi double [ %a, %entry ], [ 0.000000e+00, %if.else ] + %sub = fsub double %b, %c.0 + %mul = fmul double %sub, 2.000000e+00 + br label %return + +return: ; preds = %if.else, %if.end6 + %retval.0 = phi double [ %mul, %if.end6 ], [ 0.000000e+00, %if.else ] + ret double %retval.0 +} + +define void @f1(float %f) nounwind { +entry: +; CHECK: bc1t $BB1_2 +; CHECK: nop +; CHECK: # BB#1: + %cmp = fcmp une float %f, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @abort() noreturn + unreachable + +if.end: ; preds = %entry + tail call void (...)* @f2() nounwind + ret void +} + +declare void @abort() noreturn nounwind + +declare void @f2(...) diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll new file mode 100644 index 000000000000..e9af3045e15f --- /dev/null +++ b/test/CodeGen/Mips/blockaddr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC + +@reg = common global i8* null, align 4 + +define i8* @dummy(i8* %x) nounwind readnone noinline { +entry: + ret i8* %x +} + +; CHECK-PIC: lw $[[R0:[0-9]+]], %got($tmp1)($gp) +; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp1) +; CHECK-PIC: lw $[[R1:[0-9]+]], %got($tmp2)($gp) +; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp2) +; CHECK-STATIC: lui $[[R2:[0-9]+]], %hi($tmp1) +; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp1) +; CHECK-STATIC: lui $[[R3:[0-9]+]], %hi($tmp2) +; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp2) +define void @f() nounwind { +entry: + %call = tail call i8* @dummy(i8* blockaddress(@f, %baz)) + indirectbr i8* %call, [label %baz, label %foo] + +foo: ; preds = %foo, %entry + store i8* blockaddress(@f, %foo), i8** @reg, align 4 + br label %foo + +baz: ; preds = %entry + store i8* null, i8** @reg, align 4 + ret void +} diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll new file mode 100644 index 000000000000..585bc250fb8c --- /dev/null +++ b/test/CodeGen/Mips/buildpairextractelementf64.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s +; RUN: llc < %s -march=mips | FileCheck %s +@a = external global i32 + +define double @f(i32 %a1, double %d) nounwind { +entry: +; CHECK: mtc1 +; CHECK: mtc1 + store i32 %a1, i32* @a, align 4 + %add = fadd double %d, 2.000000e+00 + ret double %add +} + +define void @f3(double %d, i32 %a1) nounwind { +entry: +; CHECK: mfc1 +; CHECK: mfc1 + tail call void @f2(i32 %a1, double %d) nounwind + ret void +} + +declare void @f2(i32, double) + diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll index 7d3e0252e3c9..8329c891f0c2 100755 --- a/test/CodeGen/Mips/cmov.ll +++ b/test/CodeGen/Mips/cmov.ll @@ -1,10 +1,11 @@ ; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s +; RUN: llc -march=mips -mcpu=4ke -regalloc=basic < %s | FileCheck %s @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4 @i3 = common global i32* null, align 4 -; CHECK: lw $3, %got(i3)($gp) -; CHECK: addiu $5, $gp, %got(i1) +; CHECK: lw ${{[0-9]+}}, %got(i3)($gp) +; CHECK: addiu ${{[0-9]+}}, $gp, %got(i1) define i32* @cmov1(i32 %s) nounwind readonly { entry: %tobool = icmp ne i32 %s, 0 diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll new file mode 100644 index 000000000000..398d1b78bd43 --- /dev/null +++ b/test/CodeGen/Mips/divrem.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=mips < %s | FileCheck %s + +; CHECK: div $zero, +define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: div $zero, +define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %rem = srem i32 %a0, %a1 + ret i32 %rem +} + +; CHECK: divu $zero, +define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: divu $zero, +define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %rem = urem i32 %a0, %a1 + ret i32 %rem +} + +; CHECK: div $zero, +define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +entry: + %rem = srem i32 %a0, %a1 + store i32 %rem, i32* %r, align 4, !tbaa !0 + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: divu $zero, +define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +entry: + %rem = urem i32 %a0, %a1 + store i32 %rem, i32* %r, align 4, !tbaa !0 + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll new file mode 100644 index 000000000000..0a6478b0f8f0 --- /dev/null +++ b/test/CodeGen/Mips/fpbr.ll @@ -0,0 +1,119 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s + +define void @func0(float %f2, float %f3) nounwind { +entry: +; CHECK: c.eq.s +; CHECK: bc1f + %cmp = fcmp oeq float %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +declare void @g0(...) + +declare void @g1(...) + +define void @func1(float %f2, float %f3) nounwind { +entry: +; CHECK: c.olt.s +; CHECK: bc1f + %cmp = fcmp olt float %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func2(float %f2, float %f3) nounwind { +entry: +; CHECK: c.ole.s +; CHECK: bc1f + %cmp = fcmp ugt float %f2, %f3 + br i1 %cmp, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func3(double %f2, double %f3) nounwind { +entry: +; CHECK: c.eq.d +; CHECK: bc1f + %cmp = fcmp oeq double %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func4(double %f2, double %f3) nounwind { +entry: +; CHECK: c.olt.d +; CHECK: bc1f + %cmp = fcmp olt double %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func5(double %f2, double %f3) nounwind { +entry: +; CHECK: c.ole.d +; CHECK: bc1f + %cmp = fcmp ugt double %f2, %f3 + br i1 %cmp, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll new file mode 100644 index 000000000000..c89ffe67f1b9 --- /dev/null +++ b/test/CodeGen/Mips/fpcmp.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2 +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1 + +@g1 = external global i32 + +define i32 @f(float %f0, float %f1) nounwind { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1t +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1t + %cmp = fcmp olt float %f0, %f1 + %conv = zext i1 %cmp to i32 + %tmp2 = load i32* @g1, align 4 + %add = add nsw i32 %tmp2, %conv + store i32 %add, i32* @g1, align 4 + %cond = select i1 %cmp, i32 10, i32 20 + ret i32 %cond +} diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll new file mode 100644 index 000000000000..fdfa01a9e0f7 --- /dev/null +++ b/test/CodeGen/Mips/internalfunc.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -march=mips | FileCheck %s + +@caller.sf1 = internal unnamed_addr global void (...)* null, align 4 +@gf1 = external global void (...)* +@.str = private unnamed_addr constant [3 x i8] c"f2\00" + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +entry: +; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp) +; CHECK: addiu $25, $[[R0]], %lo(f2) + tail call fastcc void @f2() + ret i32 0 +} + +define void @caller(i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: lw $[[R1:[0-9]+]], %got(caller.sf1)($gp) +; CHECK: addiu ${{[0-9]+}}, $[[R1]], %lo(caller.sf1) + %tobool = icmp eq i32 %a1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %tmp1 = load void (...)** @caller.sf1, align 4 + tail call void (...)* %tmp1() nounwind + br label %if.end + +if.end: ; preds = %entry, %if.then +; CHECK: lw $[[R2:[0-9]+]], %got(sf2)($gp) +; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)($gp) +; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2) +; CHECK: addiu ${{[0-9]+}}, $[[R3]], %lo(caller.sf1) + %tobool3 = icmp ne i32 %a0, 0 + %tmp4 = load void (...)** @gf1, align 4 + %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*) + store void (...)* %cond, void (...)** @caller.sf1, align 4 + ret void +} + +define internal void @sf2() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind + ret void +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +define internal fastcc void @f2() nounwind noinline { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind + ret void +} + diff --git a/test/CodeGen/Mips/largeimm1.ll b/test/CodeGen/Mips/largeimm1.ll new file mode 100644 index 000000000000..d65cc025d085 --- /dev/null +++ b/test/CodeGen/Mips/largeimm1.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +; CHECK: lui $at, 49152 +; CHECK: lui $at, 16384 +define void @f() nounwind { +entry: + %a1 = alloca [1073741824 x i8], align 1 + %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676 + call void @f2(i8* %arrayidx) nounwind + ret void +} + +declare void @f2(i8*) diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll index b6df62be6603..3974cd4a6a76 100644 --- a/test/CodeGen/Mips/o32_cc.ll +++ b/test/CodeGen/Mips/o32_cc.ll @@ -61,8 +61,8 @@ entry: declare void @f4(i32, i32, i32, i32) ; $f12, $6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: ldc1 $f12, %lo ; CHECK: addiu $6, $zero, 23 define void @testlowercall5() nounwind { @@ -98,8 +98,8 @@ entry: declare void @f7(float, i32, i32) ; $4, $5, $6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: addiu $4, $zero, 22 ; CHECK: addiu $5, $zero, 53 ; CHECK: addiu $6, $zero, 44 @@ -115,7 +115,7 @@ declare void @f8(i32, i32, i32, double) ; CHECK: addiu $4, $zero, 32 ; CHECK: addiu $5, $zero, 63 ; CHECK: addiu $6, $zero, 54 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall9() nounwind { entry: tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind @@ -128,7 +128,7 @@ declare void @f9(i32, i32, i32, float) ; CHECK: addiu $4, $zero, 42 ; CHECK: addiu $5, $zero, 73 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall10() nounwind { entry: tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind @@ -140,7 +140,7 @@ declare void @f10(i32, i32, double) ; $4, ($6, $7) ; CHECK: addiu $4, $zero, 52 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall11() nounwind { entry: tail call void @f11(i32 52, double 1.600000e+01) nounwind @@ -152,8 +152,8 @@ declare void @f11(i32, double) ; $f12, $f14, $6, $7 ; CHECK: lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $6, $4, 0 -; CHECK: ori $7, $5, 0 +; CHECK: ori $6 +; CHECK: ori $7 define void @testlowercall12() nounwind { entry: tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind @@ -165,7 +165,7 @@ declare void @f12(float, float, float, float) ; $f12, $5, $6, $7 ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 83 -; CHECK: ori $6, $3, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 25 define void @testlowercall13() nounwind { entry: @@ -179,7 +179,7 @@ declare void @f13(float, i32, float, i32) ; $f12, $f14, $7 ; CHECK: ldc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $7, $4, 0 +; CHECK: ori $7 define void @testlowercall14() nounwind { entry: tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind @@ -192,7 +192,7 @@ declare void @f14(double, float, float) ; CHECK: lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $4, 32768 +; CHECK: ori $7 define void @testlowercall15() nounwind { entry: tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind @@ -203,9 +203,9 @@ declare void @f15(float, float, double) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 62 -; CHECK: ori $5, $2, 0 +; CHECK: ori $5 ; CHECK: addiu $6, $zero, 64 -; CHECK: ori $7, $3, 0 +; CHECK: ori $7 define void @testlowercall16() nounwind { entry: tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind @@ -216,7 +216,7 @@ declare void @f16(i32, float, i32, float) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 72 -; CHECK: ori $5, $2, 0 +; CHECK: ori $5 ; CHECK: addiu $6, $zero, 74 ; CHECK: addiu $7, $zero, 35 define void @testlowercall17() nounwind { @@ -230,7 +230,7 @@ declare void @f17(i32, float, i32, i32) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 82 ; CHECK: addiu $5, $zero, 93 -; CHECK: ori $6, $2, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 45 define void @testlowercall18() nounwind { entry: @@ -242,11 +242,11 @@ declare void @f18(i32, i32, float, i32) ; $4, ($6, $7), stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: addiu $4, $zero, 92 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $3, 0 +; CHECK: ori $7 define void @testlowercall20() nounwind { entry: tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind @@ -270,7 +270,7 @@ declare void @f21(float, i32) ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 113 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $3, 32768 +; CHECK: ori $7 define void @testlowercall22() nounwind { entry: tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind @@ -291,8 +291,8 @@ entry: declare void @f23(double, i32) ; $f12,$6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: ldc1 $f12, %lo ; CHECK: addiu $6, $zero, 133 define void @testlowercall24() nounwind { @@ -306,15 +306,15 @@ declare void @f24(double, i32, double) ; CHECK: lwc1 $f12, %lo ; lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $6, $4, 0 -; CHECK: ori $7, $5, 0 +; CHECK: ori $6 +; CHECK: ori $7 ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 83 -; CHECK: ori $6, $3, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 25 ; CHECK: addiu $4, $zero, 82 ; CHECK: addiu $5, $zero, 93 -; CHECK: ori $6, $2, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 45 define void @testlowercall25() nounwind { entry: diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll new file mode 100644 index 000000000000..1f71ed2640eb --- /dev/null +++ b/test/CodeGen/Mips/o32_cc_vararg.ll @@ -0,0 +1,278 @@ +; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s +; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s + + +; All test functions do the same thing - they return the first variable +; argument. + +; All CHECK's do the same thing - they check whether variable arguments from +; registers are placed on correct stack locations, and whether the first +; variable argument is returned from the correct stack location. + + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_end(i8*) nounwind + +; return int +define i32 @va1(i32 %a, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %b = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %b, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %b, align 4 + ret i32 %tmp + +; CHECK: va1: +; CHECK: addiu $sp, $sp, -32 +; CHECK: sw $7, 44($sp) +; CHECK: sw $6, 40($sp) +; CHECK: sw $5, 36($sp) +; CHECK: lw $2, 36($sp) +} + +; check whether the variable double argument will be accessed from the 8-byte +; aligned location (i.e. whether the address is computed by adding 7 and +; clearing lower 3 bits) +define double @va2(i32 %a, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %b = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %b, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %b, align 8 + ret double %tmp + +; CHECK: va2: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: sw $6, 48($sp) +; CHECK: sw $5, 44($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 44 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va3(double %a, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %b = alloca i32, align 4 + store double %a, double* %a.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %b, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %b, align 4 + ret i32 %tmp + +; CHECK: va3: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: sw $6, 48($sp) +; CHECK: lw $2, 48($sp) +} + +; double +define double @va4(double %a, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %b = alloca double, align 8 + store double %a, double* %a.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %b, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %b, align 8 + ret double %tmp + +; CHECK: va4: +; CHECK: addiu $sp, $sp, -48 +; CHECK: sw $7, 60($sp) +; CHECK: sw $6, 56($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 56 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va5(i32 %a, i32 %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %d, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %d, align 4 + ret i32 %tmp + +; CHECK: va5: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: lw $2, 52($sp) +} + +; double +define double @va6(i32 %a, i32 %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %d, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %d, align 8 + ret double %tmp + +; CHECK: va6: +; CHECK: addiu $sp, $sp, -48 +; CHECK: sw $7, 60($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 60 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va7(i32 %a, double %b, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %c, align 4 + ret i32 %tmp + +; CHECK: va7: +; CHECK: addiu $sp, $sp, -40 +; CHECK: lw $2, 56($sp) +} + +; double +define double @va8(i32 %a, double %b, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %c, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %c, align 8 + ret double %tmp + +; CHECK: va8: +; CHECK: addiu $sp, $sp, -48 +; CHECK: addiu $[[R0:[0-9]+]], $sp, 64 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va9(double %a, double %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca i32, align 4 + store double %a, double* %a.addr, align 8 + store double %b, double* %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %d, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %d, align 4 + ret i32 %tmp + +; CHECK: va9: +; CHECK: addiu $sp, $sp, -56 +; CHECK: lw $2, 76($sp) +} + +; double +define double @va10(double %a, double %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca double, align 8 + store double %a, double* %a.addr, align 8 + store double %b, double* %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %d, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %d, align 8 + ret double %tmp + +; CHECK: va10: +; CHECK: addiu $sp, $sp, -56 +; CHECK: addiu $[[R0:[0-9]+]], $sp, 76 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll new file mode 100644 index 000000000000..c83fa3ece026 --- /dev/null +++ b/test/CodeGen/Mips/select.ll @@ -0,0 +1,196 @@ +; RUN: llc < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2 +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1 + +@d2 = external global double +@d3 = external global double + +define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, i32 %f1, i32 %f0 + ret i32 %cond +} + +define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn.s +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, float %f0, float %f1 + ret float %cond +} + +define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn.d +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, double %f0, double %f1 + ret double %cond +} + +define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.s +; CHECK-MIPS32R2: movt.s +; CHECK-MIPS1: c.eq.s +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt.s +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1f + %cmp = fcmp olt float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf.s +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf.d +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.d +; CHECK-MIPS32R2: movt.d +; CHECK-MIPS1: c.eq.d +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.d +; CHECK-MIPS32R2: movt.d +; CHECK-MIPS1: c.olt.d +; CHECK-MIPS1: bc1f + %cmp = fcmp olt double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf.d +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf.s +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt double %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.eq.s +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1f + %cmp = fcmp olt float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.eq.d +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.eq.d +; CHECK-MIPS1: bc1f + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp oeq double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.olt.d +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.d +; CHECK-MIPS1: bc1f + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp olt double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp ogt double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index 1259d03e96c9..235b00e8782f 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -1,15 +1,71 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: add.s32 r0, r1, r2; +define ptx_device i16 @t1_u16(i16 %x, i16 %y) { +; CHECK: add.u16 rh0, rh1, rh2; +; CHECK-NEXT: ret; + %z = add i16 %x, %y + ret i16 %z +} + +define ptx_device i32 @t1_u32(i32 %x, i32 %y) { +; CHECK: add.u32 r0, r1, r2; +; CHECK-NEXT: ret; %z = add i32 %x, %y -; CHECK: ret; ret i32 %z } -define ptx_device i32 @t2(i32 %x) { -; CHECK: add.s32 r0, r1, 1; +define ptx_device i64 @t1_u64(i64 %x, i64 %y) { +; CHECK: add.u64 rd0, rd1, rd2; +; CHECK-NEXT: ret; + %z = add i64 %x, %y + ret i64 %z +} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: add.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fadd float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: add.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fadd double %x, %y + ret double %z +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: add.u16 rh0, rh1, 1; +; CHECK-NEXT: ret; + %z = add i16 %x, 1 + ret i16 %z +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: add.u32 r0, r1, 1; +; CHECK-NEXT: ret; %z = add i32 %x, 1 -; CHECK: ret; ret i32 %z } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: add.u64 rd0, rd1, 1; +; CHECK-NEXT: ret; + %z = add i64 %x, 1 + ret i64 %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: add.f32 f0, f1, 0F3F800000; +; CHECK-NEXT: ret; + %z = fadd float %x, 1.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000; +; CHECK-NEXT: ret; + %z = fadd double %x, 1.0 + ret double %z +} diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll new file mode 100644 index 000000000000..dbc77e53330b --- /dev/null +++ b/test/CodeGen/PTX/bitwise.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +; preds + +define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { +; CHECK: and.pred p0, p1, p2 + %c = and i1 %x, %y + %d = zext i1 %c to i32 + ret i32 %d +} + +define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { +; CHECK: or.pred p0, p1, p2 + %a = or i1 %x, %y + %b = zext i1 %a to i32 + ret i32 %b +} + +define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { +; CHECK: xor.pred p0, p1, p2 + %a = xor i1 %x, %y + %b = zext i1 %a to i32 + ret i32 %b +} diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll new file mode 100644 index 000000000000..49383eb3cf96 --- /dev/null +++ b/test/CodeGen/PTX/bra.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +define ptx_device void @test_bra_direct() { +; CHECK: bra $L__BB0_1; +entry: + br label %loop +loop: + br label %loop +} + +define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { +entry: +; CHECK: setp.le.u32 p0, r1, r2 + %p = icmp ugt i32 %x, %y +; CHECK-NEXT: @p0 bra +; CHECK-NOT: bra + br i1 %p, label %clause.if, label %clause.else +clause.if: +; CHECK: mov.u32 r0, r1 + ret i32 %x +clause.else: +; CHECK: mov.u32 r0, r2 + ret i32 %y +} diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll index 4071babb80ce..7816c801728f 100644 --- a/test/CodeGen/PTX/exit.ll +++ b/test/CodeGen/PTX/exit.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_kernel void @t1() { ; CHECK: exit; diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll new file mode 100644 index 000000000000..121360ce9be3 --- /dev/null +++ b/test/CodeGen/PTX/fdiv-sm10.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: div.approx.f32 f0, f1, f2; +; CHECK-NEXT: ret; + %a = fdiv float %x, %y + ret float %a +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: div.f64 fd0, fd1, fd2; +; CHECK-NEXT: ret; + %a = fdiv double %x, %y + ret double %a +} diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll new file mode 100644 index 000000000000..0ec7bae8030e --- /dev/null +++ b/test/CodeGen/PTX/fdiv-sm13.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: div.approx.f32 f0, f1, f2; +; CHECK-NEXT: ret; + %a = fdiv float %x, %y + ret float %a +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: div.rn.f64 fd0, fd1, fd2; +; CHECK-NEXT: ret; + %a = fdiv double %x, %y + ret double %a +} diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll new file mode 100644 index 000000000000..cea41827ca47 --- /dev/null +++ b/test/CodeGen/PTX/intrinsic.ll @@ -0,0 +1,281 @@ +; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s + +define ptx_device i32 @test_tid_x() { +; CHECK: mov.u32 r0, %tid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.x() + ret i32 %x +} + +define ptx_device i32 @test_tid_y() { +; CHECK: mov.u32 r0, %tid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.y() + ret i32 %x +} + +define ptx_device i32 @test_tid_z() { +; CHECK: mov.u32 r0, %tid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.z() + ret i32 %x +} + +define ptx_device i32 @test_tid_w() { +; CHECK: mov.u32 r0, %tid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.w() + ret i32 %x +} + +define ptx_device i32 @test_ntid_x() { +; CHECK: mov.u32 r0, %ntid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.x() + ret i32 %x +} + +define ptx_device i32 @test_ntid_y() { +; CHECK: mov.u32 r0, %ntid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.y() + ret i32 %x +} + +define ptx_device i32 @test_ntid_z() { +; CHECK: mov.u32 r0, %ntid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.z() + ret i32 %x +} + +define ptx_device i32 @test_ntid_w() { +; CHECK: mov.u32 r0, %ntid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.w() + ret i32 %x +} + +define ptx_device i32 @test_laneid() { +; CHECK: mov.u32 r0, %laneid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.laneid() + ret i32 %x +} + +define ptx_device i32 @test_warpid() { +; CHECK: mov.u32 r0, %warpid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.warpid() + ret i32 %x +} + +define ptx_device i32 @test_nwarpid() { +; CHECK: mov.u32 r0, %nwarpid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nwarpid() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_x() { +; CHECK: mov.u32 r0, %ctaid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_y() { +; CHECK: mov.u32 r0, %ctaid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_z() { +; CHECK: mov.u32 r0, %ctaid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_w() { +; CHECK: mov.u32 r0, %ctaid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_x() { +; CHECK: mov.u32 r0, %nctaid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_y() { +; CHECK: mov.u32 r0, %nctaid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_z() { +; CHECK: mov.u32 r0, %nctaid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_w() { +; CHECK: mov.u32 r0, %nctaid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_smid() { +; CHECK: mov.u32 r0, %smid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.smid() + ret i32 %x +} + +define ptx_device i32 @test_nsmid() { +; CHECK: mov.u32 r0, %nsmid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nsmid() + ret i32 %x +} + +define ptx_device i32 @test_gridid() { +; CHECK: mov.u32 r0, %gridid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.gridid() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_eq() { +; CHECK: mov.u32 r0, %lanemask_eq; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.eq() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_le() { +; CHECK: mov.u32 r0, %lanemask_le; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.le() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_lt() { +; CHECK: mov.u32 r0, %lanemask_lt; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.lt() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_ge() { +; CHECK: mov.u32 r0, %lanemask_ge; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.ge() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_gt() { +; CHECK: mov.u32 r0, %lanemask_gt; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.gt() + ret i32 %x +} + +define ptx_device i32 @test_clock() { +; CHECK: mov.u32 r0, %clock; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.clock() + ret i32 %x +} + +define ptx_device i64 @test_clock64() { +; CHECK: mov.u64 rd0, %clock64; +; CHECK-NEXT: ret; + %x = call i64 @llvm.ptx.read.clock64() + ret i64 %x +} + +define ptx_device i32 @test_pm0() { +; CHECK: mov.u32 r0, %pm0; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm0() + ret i32 %x +} + +define ptx_device i32 @test_pm1() { +; CHECK: mov.u32 r0, %pm1; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm1() + ret i32 %x +} + +define ptx_device i32 @test_pm2() { +; CHECK: mov.u32 r0, %pm2; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm2() + ret i32 %x +} + +define ptx_device i32 @test_pm3() { +; CHECK: mov.u32 r0, %pm3; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm3() + ret i32 %x +} + +define ptx_device void @test_bar_sync() { +; CHECK: bar.sync 0 +; CHECK-NEXT: ret; + call void @llvm.ptx.bar.sync(i32 0) + ret void +} + +declare i32 @llvm.ptx.read.tid.x() +declare i32 @llvm.ptx.read.tid.y() +declare i32 @llvm.ptx.read.tid.z() +declare i32 @llvm.ptx.read.tid.w() +declare i32 @llvm.ptx.read.ntid.x() +declare i32 @llvm.ptx.read.ntid.y() +declare i32 @llvm.ptx.read.ntid.z() +declare i32 @llvm.ptx.read.ntid.w() + +declare i32 @llvm.ptx.read.laneid() +declare i32 @llvm.ptx.read.warpid() +declare i32 @llvm.ptx.read.nwarpid() + +declare i32 @llvm.ptx.read.ctaid.x() +declare i32 @llvm.ptx.read.ctaid.y() +declare i32 @llvm.ptx.read.ctaid.z() +declare i32 @llvm.ptx.read.ctaid.w() +declare i32 @llvm.ptx.read.nctaid.x() +declare i32 @llvm.ptx.read.nctaid.y() +declare i32 @llvm.ptx.read.nctaid.z() +declare i32 @llvm.ptx.read.nctaid.w() + +declare i32 @llvm.ptx.read.smid() +declare i32 @llvm.ptx.read.nsmid() +declare i32 @llvm.ptx.read.gridid() + +declare i32 @llvm.ptx.read.lanemask.eq() +declare i32 @llvm.ptx.read.lanemask.le() +declare i32 @llvm.ptx.read.lanemask.lt() +declare i32 @llvm.ptx.read.lanemask.ge() +declare i32 @llvm.ptx.read.lanemask.gt() + +declare i32 @llvm.ptx.read.clock() +declare i64 @llvm.ptx.read.clock64() + +declare i32 @llvm.ptx.read.pm0() +declare i32 @llvm.ptx.read.pm1() +declare i32 @llvm.ptx.read.pm2() +declare i32 @llvm.ptx.read.pm3() + +declare void @llvm.ptx.bar.sync(i32 %i) diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 836c4d41045a..377a95abe3db 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -1,78 +1,447 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -;CHECK: .extern .global .s32 array[]; -@array = external global [10 x i32] +;CHECK: .extern .global .b8 array_i16[20]; +@array_i16 = external global [10 x i16] -;CHECK: .extern .const .s32 array_constant[]; -@array_constant = external addrspace(1) constant [10 x i32] +;CHECK: .extern .const .b8 array_constant_i16[20]; +@array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .s32 array_local[]; -@array_local = external addrspace(2) global [10 x i32] +;CHECK: .extern .local .b8 array_local_i16[20]; +@array_local_i16 = external addrspace(2) global [10 x i16] -;CHECK: .extern .shared .s32 array_shared[]; -@array_shared = external addrspace(4) global [10 x i32] +;CHECK: .extern .shared .b8 array_shared_i16[20]; +@array_shared_i16 = external addrspace(4) global [10 x i16] -define ptx_device i32 @t1(i32* %p) { +;CHECK: .extern .global .b8 array_i32[40]; +@array_i32 = external global [10 x i32] + +;CHECK: .extern .const .b8 array_constant_i32[40]; +@array_constant_i32 = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .b8 array_local_i32[40]; +@array_local_i32 = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .b8 array_shared_i32[40]; +@array_shared_i32 = external addrspace(4) global [10 x i32] + +;CHECK: .extern .global .b8 array_i64[80]; +@array_i64 = external global [10 x i64] + +;CHECK: .extern .const .b8 array_constant_i64[80]; +@array_constant_i64 = external addrspace(1) constant [10 x i64] + +;CHECK: .extern .local .b8 array_local_i64[80]; +@array_local_i64 = external addrspace(2) global [10 x i64] + +;CHECK: .extern .shared .b8 array_shared_i64[80]; +@array_shared_i64 = external addrspace(4) global [10 x i64] + +;CHECK: .extern .global .b8 array_float[40]; +@array_float = external global [10 x float] + +;CHECK: .extern .const .b8 array_constant_float[40]; +@array_constant_float = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .b8 array_local_float[40]; +@array_local_float = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .b8 array_shared_float[40]; +@array_shared_float = external addrspace(4) global [10 x float] + +;CHECK: .extern .global .b8 array_double[80]; +@array_double = external global [10 x double] + +;CHECK: .extern .const .b8 array_constant_double[80]; +@array_constant_double = external addrspace(1) constant [10 x double] + +;CHECK: .extern .local .b8 array_local_double[80]; +@array_local_double = external addrspace(2) global [10 x double] + +;CHECK: .extern .shared .b8 array_shared_double[80]; +@array_shared_double = external addrspace(4) global [10 x double] + + +define ptx_device i16 @t1_u16(i16* %p) { entry: -;CHECK: ld.global.s32 r0, [r1]; +;CHECK: ld.global.u16 rh0, [r1]; +;CHECK-NEXT: ret; + %x = load i16* %p + ret i16 %x +} + +define ptx_device i32 @t1_u32(i32* %p) { +entry: +;CHECK: ld.global.u32 r0, [r1]; +;CHECK-NEXT: ret; %x = load i32* %p ret i32 %x } -define ptx_device i32 @t2(i32* %p) { +define ptx_device i64 @t1_u64(i64* %p) { +entry: +;CHECK: ld.global.u64 rd0, [r1]; +;CHECK-NEXT: ret; + %x = load i64* %p + ret i64 %x +} + +define ptx_device float @t1_f32(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1]; +;CHECK-NEXT: ret; + %x = load float* %p + ret float %x +} + +define ptx_device double @t1_f64(double* %p) { +entry: +;CHECK: ld.global.f64 fd0, [r1]; +;CHECK-NEXT: ret; + %x = load double* %p + ret double %x +} + +define ptx_device i16 @t2_u16(i16* %p) { entry: -;CHECK: ld.global.s32 r0, [r1+4]; +;CHECK: ld.global.u16 rh0, [r1+2]; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 1 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t2_u32(i32* %p) { +entry: +;CHECK: ld.global.u32 r0, [r1+4]; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 %x = load i32* %i ret i32 %x } -define ptx_device i32 @t3(i32* %p, i32 %q) { +define ptx_device i64 @t2_u64(i64* %p) { +entry: +;CHECK: ld.global.u64 rd0, [r1+8]; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 1 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t2_f32(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1+4]; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + %x = load float* %i + ret float %x +} + +define ptx_device double @t2_f64(double* %p) { +entry: +;CHECK: ld.global.f64 fd0, [r1+8]; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 1 + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t3_u16(i16* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 1; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u16 rh0, [r0]; + %i = getelementptr i16* %p, i32 %q + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t3_u32(i32* %p, i32 %q) { entry: ;CHECK: shl.b32 r0, r2, 2; -;CHECK: add.s32 r0, r1, r0; -;CHECK: ld.global.s32 r0, [r0]; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u32 r0, [r0]; %i = getelementptr i32* %p, i32 %q %x = load i32* %i ret i32 %x } -define ptx_device i32 @t4_global() { +define ptx_device i64 @t3_u64(i64* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u64 rd0, [r0]; + %i = getelementptr i64* %p, i32 %q + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t3_f32(float* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.f32 f0, [r0]; + %i = getelementptr float* %p, i32 %q + %x = load float* %i + ret float %x +} + +define ptx_device double @t3_f64(double* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.f64 fd0, [r0]; + %i = getelementptr double* %p, i32 %q + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t4_global_u16() { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t4_global_u32() { entry: -;CHECK: ld.global.s32 r0, [array]; - %i = getelementptr [10 x i32]* @array, i32 0, i32 0 +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 %x = load i32* %i ret i32 %x } -define ptx_device i32 @t4_const() { +define ptx_device i64 @t4_global_u64() { entry: -;CHECK: ld.const.s32 r0, [array_constant]; - %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0 +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t4_global_f32() { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: ld.global.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 + %x = load float* %i + ret float %x +} + +define ptx_device double @t4_global_f64() { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: ld.global.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t4_const_u16() { +entry: +;CHECK: mov.u32 r0, array_constant_i16; +;CHECK-NEXT: ld.const.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 + %x = load i16 addrspace(1)* %i + ret i16 %x +} + +define ptx_device i32 @t4_const_u32() { +entry: +;CHECK: mov.u32 r0, array_constant_i32; +;CHECK-NEXT: ld.const.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 %x = load i32 addrspace(1)* %i ret i32 %x } -define ptx_device i32 @t4_local() { +define ptx_device i64 @t4_const_u64() { +entry: +;CHECK: mov.u32 r0, array_constant_i64; +;CHECK-NEXT: ld.const.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 + %x = load i64 addrspace(1)* %i + ret i64 %x +} + +define ptx_device float @t4_const_f32() { entry: -;CHECK: ld.local.s32 r0, [array_local]; - %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 +;CHECK: mov.u32 r0, array_constant_float; +;CHECK-NEXT: ld.const.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 + %x = load float addrspace(1)* %i + ret float %x +} + +define ptx_device double @t4_const_f64() { +entry: +;CHECK: mov.u32 r0, array_constant_double; +;CHECK-NEXT: ld.const.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 + %x = load double addrspace(1)* %i + ret double %x +} + +define ptx_device i16 @t4_local_u16() { +entry: +;CHECK: mov.u32 r0, array_local_i16; +;CHECK-NEXT: ld.local.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 + %x = load i16 addrspace(2)* %i + ret i16 %x +} + +define ptx_device i32 @t4_local_u32() { +entry: +;CHECK: mov.u32 r0, array_local_i32; +;CHECK-NEXT: ld.local.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 %x = load i32 addrspace(2)* %i ret i32 %x } -define ptx_device i32 @t4_shared() { +define ptx_device i64 @t4_local_u64() { entry: -;CHECK: ld.shared.s32 r0, [array_shared]; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 +;CHECK: mov.u32 r0, array_local_i64; +;CHECK-NEXT: ld.local.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 + %x = load i64 addrspace(2)* %i + ret i64 %x +} + +define ptx_device float @t4_local_f32() { +entry: +;CHECK: mov.u32 r0, array_local_float; +;CHECK-NEXT: ld.local.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 + %x = load float addrspace(2)* %i + ret float %x +} + +define ptx_device double @t4_local_f64() { +entry: +;CHECK: mov.u32 r0, array_local_double; +;CHECK-NEXT: ld.local.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 + %x = load double addrspace(2)* %i + ret double %x +} + +define ptx_device i16 @t4_shared_u16() { +entry: +;CHECK: mov.u32 r0, array_shared_i16; +;CHECK-NEXT: ld.shared.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 + %x = load i16 addrspace(4)* %i + ret i16 %x +} + +define ptx_device i32 @t4_shared_u32() { +entry: +;CHECK: mov.u32 r0, array_shared_i32; +;CHECK-NEXT: ld.shared.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 %x = load i32 addrspace(4)* %i ret i32 %x } -define ptx_device i32 @t5() { +define ptx_device i64 @t4_shared_u64() { +entry: +;CHECK: mov.u32 r0, array_shared_i64; +;CHECK-NEXT: ld.shared.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 + %x = load i64 addrspace(4)* %i + ret i64 %x +} + +define ptx_device float @t4_shared_f32() { +entry: +;CHECK: mov.u32 r0, array_shared_float; +;CHECK-NEXT: ld.shared.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 + %x = load float addrspace(4)* %i + ret float %x +} + +define ptx_device double @t4_shared_f64() { +entry: +;CHECK: mov.u32 r0, array_shared_double; +;CHECK-NEXT: ld.shared.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 + %x = load double addrspace(4)* %i + ret double %x +} + +define ptx_device i16 @t5_u16() { entry: -;CHECK: ld.global.s32 r0, [array+4]; - %i = getelementptr [10 x i32]* @array, i32 0, i32 1 +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: ld.global.u16 rh0, [r0+2]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t5_u32() { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: ld.global.u32 r0, [r0+4]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 %x = load i32* %i ret i32 %x } + +define ptx_device i64 @t5_u64() { +entry: +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: ld.global.u64 rd0, [r0+8]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t5_f32() { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: ld.global.f32 f0, [r0+4]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 + %x = load float* %i + ret float %x +} + +define ptx_device double @t5_f64() { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: ld.global.f64 fd0, [r0+8]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 + %x = load double* %i + ret double %x +} diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll new file mode 100644 index 000000000000..1e265f5b7b3a --- /dev/null +++ b/test/CodeGen/PTX/llvm-intrinsic.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s + +define ptx_device float @test_sqrt_f32(float %x) { +entry: +; CHECK: sqrt.rn.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.sqrt.f32(float %x) + ret float %y +} + +define ptx_device double @test_sqrt_f64(double %x) { +entry: +; CHECK: sqrt.rn.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.sqrt.f64(double %x) + ret double %y +} + +define ptx_device float @test_sin_f32(float %x) { +entry: +; CHECK: sin.approx.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.sin.f32(float %x) + ret float %y +} + +define ptx_device double @test_sin_f64(double %x) { +entry: +; CHECK: sin.approx.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.sin.f64(double %x) + ret double %y +} + +define ptx_device float @test_cos_f32(float %x) { +entry: +; CHECK: cos.approx.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.cos.f32(float %x) + ret float %y +} + +define ptx_device double @test_cos_f64(double %x) { +entry: +; CHECK: cos.approx.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.cos.f64(double %x) + ret double %y +} + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) +declare float @llvm.sin.f32(float) +declare double @llvm.sin.f64(double) +declare float @llvm.cos.f32(float) +declare double @llvm.cos.f64(double) diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll new file mode 100644 index 000000000000..0c25f2c0030a --- /dev/null +++ b/test/CodeGen/PTX/mad.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y, float %z) { +; CHECK: mad.rn.f32 f0, f1, f2, f3; +; CHECK-NEXT: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + ret float %b +} + +define ptx_device double @t1_f64(double %x, double %y, double %z) { +; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3; +; CHECK-NEXT: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index c365e9beb897..120572a0e868 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,13 +1,62 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1() { -; CHECK: mov.s32 r0, 0; +define ptx_device i16 @t1_u16() { +; CHECK: mov.u16 rh0, 0; +; CHECK: ret; + ret i16 0 +} + +define ptx_device i32 @t1_u32() { +; CHECK: mov.u32 r0, 0; ; CHECK: ret; ret i32 0 } -define ptx_device i32 @t2(i32 %x) { -; CHECK: mov.s32 r0, r1; +define ptx_device i64 @t1_u64() { +; CHECK: mov.u64 rd0, 0; +; CHECK: ret; + ret i64 0 +} + +define ptx_device float @t1_f32() { +; CHECK: mov.f32 f0, 0F00000000; +; CHECK: ret; + ret float 0.0 +} + +define ptx_device double @t1_f64() { +; CHECK: mov.f64 fd0, 0D0000000000000000; +; CHECK: ret; + ret double 0.0 +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: mov.u16 rh0, rh1; +; CHECK: ret; + ret i16 %x +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: mov.u32 r0, r1; ; CHECK: ret; ret i32 %x } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: mov.u64 rd0, rd1; +; CHECK: ret; + ret i64 %x +} + +define ptx_device float @t3_f32(float %x) { +; CHECK: mov.f32 f0, f1; +; CHECK-NEXT: ret; + ret float %x +} + +define ptx_device double @t3_f64(double %x) { +; CHECK: mov.f64 fd0, fd1; +; CHECK-NEXT: ret; + ret double %x +} + diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll new file mode 100644 index 000000000000..5ce042675dc8 --- /dev/null +++ b/test/CodeGen/PTX/mul.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +;define ptx_device i32 @t1(i32 %x, i32 %y) { +; %z = mul i32 %x, %y +; ret i32 %z +;} + +;define ptx_device i32 @t2(i32 %x) { +; %z = mul i32 %x, 1 +; ret i32 %z +;} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: mul.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fmul float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: mul.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fmul double %x, %y + ret double %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: mul.f32 f0, f1, 0F40A00000; +; CHECK-NEXT: ret; + %z = fmul float %x, 5.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: mul.f64 fd0, fd1, 0D4014000000000000; +; CHECK-NEXT: ret; + %z = fmul double %x, 5.0 + ret double %z +} diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll index a14d5c9c27ba..ac33fef0d6e3 100644 --- a/test/CodeGen/PTX/options.ll +++ b/test/CodeGen/PTX/options.ll @@ -1,5 +1,9 @@ -; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0" -; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20" +; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0" +; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1" +; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2" +; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10" +; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13" +; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20" define ptx_device void @t1() { ret void diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll new file mode 100644 index 000000000000..95d4a328149c --- /dev/null +++ b/test/CodeGen/PTX/parameter-order.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .f32 f1, .reg .u32 r1, .reg .u32 r2, .reg .f32 f2) +define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { +; CHECK: sub.u32 r0, r1, r2 + %result = sub i32 %b, %c + ret i32 %result +} diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll index d5037f25fd36..ba0523f6424a 100644 --- a/test/CodeGen/PTX/ret.ll +++ b/test/CodeGen/PTX/ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device void @t1() { ; CHECK: ret; diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll new file mode 100644 index 000000000000..5836122049e6 --- /dev/null +++ b/test/CodeGen/PTX/setp.ll @@ -0,0 +1,134 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.eq.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ne.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.lt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.le.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.gt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ge.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { +; CHECK: setp.lt.u32 p0, r1, 2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { +; CHECK: setp.gt.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { +; CHECK: setp.gt.u32 p0, r3, r4; +; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %c = icmp eq i32 %x, %y + %d = icmp ugt i32 %u, %v + %e = and i1 %c, %d + %z = zext i1 %e to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { +; CHECK: cvt.pred.u32 p0, r3; +; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %c = trunc i32 %w to i1 + %d = icmp eq i32 %x, %y + %e = xor i1 %c, 1 + %f = and i1 %d, %e + %z = zext i1 %f to i32 + ret i32 %z +} diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll index b564b43ab932..6e72c9221325 100644 --- a/test/CodeGen/PTX/shl.ll +++ b/test/CodeGen/PTX/shl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { ; CHECK: shl.b32 r0, r1, r2 diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll index 3f8ade862b75..8693e0ecf49a 100644 --- a/test/CodeGen/PTX/shr.ll +++ b/test/CodeGen/PTX/shr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { ; CHECK: shr.u32 r0, r1, r2 diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index 2cbacb9ee59c..dee5c61abe66 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -1,71 +1,402 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -;CHECK: .extern .global .s32 array[]; -@array = external global [10 x i32] +;CHECK: .extern .global .b8 array_i16[20]; +@array_i16 = external global [10 x i16] -;CHECK: .extern .const .s32 array_constant[]; -@array_constant = external addrspace(1) constant [10 x i32] +;CHECK: .extern .const .b8 array_constant_i16[20]; +@array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .s32 array_local[]; -@array_local = external addrspace(2) global [10 x i32] +;CHECK: .extern .local .b8 array_local_i16[20]; +@array_local_i16 = external addrspace(2) global [10 x i16] -;CHECK: .extern .shared .s32 array_shared[]; -@array_shared = external addrspace(4) global [10 x i32] +;CHECK: .extern .shared .b8 array_shared_i16[20]; +@array_shared_i16 = external addrspace(4) global [10 x i16] -define ptx_device void @t1(i32* %p, i32 %x) { +;CHECK: .extern .global .b8 array_i32[40]; +@array_i32 = external global [10 x i32] + +;CHECK: .extern .const .b8 array_constant_i32[40]; +@array_constant_i32 = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .b8 array_local_i32[40]; +@array_local_i32 = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .b8 array_shared_i32[40]; +@array_shared_i32 = external addrspace(4) global [10 x i32] + +;CHECK: .extern .global .b8 array_i64[80]; +@array_i64 = external global [10 x i64] + +;CHECK: .extern .const .b8 array_constant_i64[80]; +@array_constant_i64 = external addrspace(1) constant [10 x i64] + +;CHECK: .extern .local .b8 array_local_i64[80]; +@array_local_i64 = external addrspace(2) global [10 x i64] + +;CHECK: .extern .shared .b8 array_shared_i64[80]; +@array_shared_i64 = external addrspace(4) global [10 x i64] + +;CHECK: .extern .global .b8 array_float[40]; +@array_float = external global [10 x float] + +;CHECK: .extern .const .b8 array_constant_float[40]; +@array_constant_float = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .b8 array_local_float[40]; +@array_local_float = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .b8 array_shared_float[40]; +@array_shared_float = external addrspace(4) global [10 x float] + +;CHECK: .extern .global .b8 array_double[80]; +@array_double = external global [10 x double] + +;CHECK: .extern .const .b8 array_constant_double[80]; +@array_constant_double = external addrspace(1) constant [10 x double] + +;CHECK: .extern .local .b8 array_local_double[80]; +@array_local_double = external addrspace(2) global [10 x double] + +;CHECK: .extern .shared .b8 array_shared_double[80]; +@array_shared_double = external addrspace(4) global [10 x double] + + +define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.s32 [r1], r2; +;CHECK: st.global.u16 [r1], rh1; +;CHECK-NEXT: ret; + store i16 %x, i16* %p + ret void +} + +define ptx_device void @t1_u32(i32* %p, i32 %x) { +entry: +;CHECK: st.global.u32 [r1], r2; +;CHECK-NEXT: ret; store i32 %x, i32* %p ret void } -define ptx_device void @t2(i32* %p, i32 %x) { +define ptx_device void @t1_u64(i64* %p, i64 %x) { +entry: +;CHECK: st.global.u64 [r1], rd1; +;CHECK-NEXT: ret; + store i64 %x, i64* %p + ret void +} + +define ptx_device void @t1_f32(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1], f1; +;CHECK-NEXT: ret; + store float %x, float* %p + ret void +} + +define ptx_device void @t1_f64(double* %p, double %x) { +entry: +;CHECK: st.global.f64 [r1], fd1; +;CHECK-NEXT: ret; + store double %x, double* %p + ret void +} + +define ptx_device void @t2_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.s32 [r1+4], r2; +;CHECK: st.global.u16 [r1+2], rh1; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 1 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t2_u32(i32* %p, i32 %x) { +entry: +;CHECK: st.global.u32 [r1+4], r2; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 store i32 %x, i32* %i ret void } -define ptx_device void @t3(i32* %p, i32 %q, i32 %x) { -;CHECK: .reg .s32 r0; +define ptx_device void @t2_u64(i64* %p, i64 %x) { +entry: +;CHECK: st.global.u64 [r1+8], rd1; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 1 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t2_f32(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + store float %x, float* %i + ret void +} + +define ptx_device void @t2_f64(double* %p, double %x) { +entry: +;CHECK: st.global.f64 [r1+8], fd1; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 1 + store double %x, double* %i + ret void +} + +define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { +entry: +;CHECK: shl.b32 r0, r2, 1; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 %q + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { entry: ;CHECK: shl.b32 r0, r2, 2; -;CHECK: add.s32 r0, r1, r0; -;CHECK: st.global.s32 [r0], r3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u32 [r0], r3; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 %q store i32 %x, i32* %i ret void } -define ptx_device void @t4_global(i32 %x) { +define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 %q + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 %q + store float %x, float* %i + ret void +} + +define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 %q + store double %x, double* %i + ret void +} + +define ptx_device void @t4_global_u16(i16 %x) { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t4_global_u32(i32 %x) { entry: -;CHECK: st.global.s32 [array], r1; - %i = getelementptr [10 x i32]* @array, i32 0, i32 0 +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: st.global.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 store i32 %x, i32* %i ret void } -define ptx_device void @t4_local(i32 %x) { +define ptx_device void @t4_global_u64(i64 %x) { +entry: +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t4_global_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 + store float %x, float* %i + ret void +} + +define ptx_device void @t4_global_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 + store double %x, double* %i + ret void +} + +define ptx_device void @t4_local_u16(i16 %x) { entry: -;CHECK: st.local.s32 [array_local], r1; - %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 +;CHECK: mov.u32 r0, array_local_i16; +;CHECK-NEXT: st.local.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 + store i16 %x, i16 addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_local_i32; +;CHECK-NEXT: st.local.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 store i32 %x, i32 addrspace(2)* %i ret void } -define ptx_device void @t4_shared(i32 %x) { +define ptx_device void @t4_local_u64(i64 %x) { +entry: +;CHECK: mov.u32 r0, array_local_i64; +;CHECK-NEXT: st.local.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 + store i64 %x, i64 addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_local_float; +;CHECK-NEXT: st.local.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 + store float %x, float addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_local_double; +;CHECK-NEXT: st.local.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 + store double %x, double addrspace(2)* %i + ret void +} + +define ptx_device void @t4_shared_u16(i16 %x) { entry: -;CHECK: st.shared.s32 [array_shared], r1; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 +;CHECK: mov.u32 r0, array_shared_i16; +;CHECK-NEXT: st.shared.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 + store i16 %x, i16 addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_shared_i32; +;CHECK-NEXT: st.shared.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 store i32 %x, i32 addrspace(4)* %i ret void } -define ptx_device void @t5(i32 %x) { +define ptx_device void @t4_shared_u64(i64 %x) { +entry: +;CHECK: mov.u32 r0, array_shared_i64; +;CHECK-NEXT: st.shared.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 + store i64 %x, i64 addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_shared_float; +;CHECK-NEXT: st.shared.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 + store float %x, float addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_shared_double; +;CHECK-NEXT: st.shared.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 + store double %x, double addrspace(4)* %i + ret void +} + +define ptx_device void @t5_u16(i16 %x) { entry: -;CHECK: st.global.s32 [array+4], r1; - %i = getelementptr [10 x i32]* @array, i32 0, i32 1 +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: st.global.u16 [r0+2], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t5_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: st.global.u32 [r0+4], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 store i32 %x, i32* %i ret void } + +define ptx_device void @t5_u64(i64 %x) { +entry: +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: st.global.u64 [r0+8], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t5_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: st.global.f32 [r0+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 + store float %x, float* %i + ret void +} + +define ptx_device void @t5_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: st.global.f64 [r0+8], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 + store double %x, double* %i + ret void +} diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index aab3fdadad13..7dd2c6f6ac79 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -1,15 +1,71 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1(i32 %x, i32 %y) { -;CHECK: sub.s32 r0, r1, r2; +define ptx_device i16 @t1_u16(i16 %x, i16 %y) { +; CHECK: sub.u16 rh0, rh1, rh2; +; CHECK-NEXT: ret; + %z = sub i16 %x, %y + ret i16 %z +} + +define ptx_device i32 @t1_u32(i32 %x, i32 %y) { +; CHECK: sub.u32 r0, r1, r2; +; CHECK-NEXT: ret; %z = sub i32 %x, %y -;CHECK: ret; ret i32 %z } -define ptx_device i32 @t2(i32 %x) { -;CHECK: add.s32 r0, r1, -1; +define ptx_device i64 @t1_u64(i64 %x, i64 %y) { +; CHECK: sub.u64 rd0, rd1, rd2; +; CHECK-NEXT: ret; + %z = sub i64 %x, %y + ret i64 %z +} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: sub.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fsub float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: sub.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fsub double %x, %y + ret double %z +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: add.u16 rh0, rh1, -1; +; CHECK-NEXT: ret; + %z = sub i16 %x, 1 + ret i16 %z +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: add.u32 r0, r1, -1; +; CHECK-NEXT: ret; %z = sub i32 %x, 1 -;CHECK: ret; ret i32 %z } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: add.u64 rd0, rd1, -1; +; CHECK-NEXT: ret; + %z = sub i64 %x, 1 + ret i64 %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: add.f32 f0, f1, 0FBF800000; +; CHECK-NEXT: ret; + %z = fsub float %x, 1.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000; +; CHECK-NEXT: ret; + %z = fsub double %x, 1.0 + ret double %z +} diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll index 2315e36ff465..a2a5e9e39641 100644 --- a/test/CodeGen/PowerPC/2008-12-12-EH.ll +++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh +; RUN: llc < %s -disable-cfi -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh define void @_Z1fv() { entry: diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll index b10920a6c10d..72ae9d6c73b3 100644 --- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll +++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 -regalloc=basic | FileCheck %s declare i8* @llvm.frameaddress(i32) nounwind readnone diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll index 1dc4310761c3..cfc1eb98e064 100644 --- a/test/CodeGen/PowerPC/Atomics-64.ll +++ b/test/CodeGen/PowerPC/Atomics-64.ll @@ -1,5 +1,11 @@ -; RUN: llc < %s -march=ppc64 -; ModuleID = 'Atomics.c' +; RUN: llc < %s -march=ppc64 -verify-machineinstrs +; +; This test is disabled until PPCISelLowering learns to insert proper 64-bit +; code for ATOMIC_CMP_SWAP. Currently, it is inserting 32-bit instructions with +; 64-bit operands which causes the machine code verifier to throw a tantrum. +; +; XFAIL: * + target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc64-apple-darwin9" @sc = common global i8 0 ; [#uses=52] diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll index 404fdd01966c..ecd5ecd2eca1 100644 --- a/test/CodeGen/PowerPC/Frames-small.ll +++ b/test/CodeGen/PowerPC/Frames-small.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -; RUN not grep {stw r31, -4(r1)} %t1 +; RUN: not grep {stw r31, -4(r1)} %t1 ; RUN: grep {stwu r1, -16448(r1)} %t1 ; RUN: grep {addi r1, r1, 16448} %t1 ; RUN: llc < %s -march=ppc32 | \ diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll index 5122ab39d232..ac5662534d0b 100644 --- a/test/CodeGen/PowerPC/indirectbr.ll +++ b/test/CodeGen/PowerPC/indirectbr.ll @@ -43,13 +43,13 @@ L2: ; preds = %L3, %bb2 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] -; PIC: addis r4, r4, ha16(Ltmp0-L0$pb) -; PIC: li r6, lo16(Ltmp0-L0$pb) -; PIC: add r4, r4, r6 -; PIC: stw r4 -; STATIC: li r5, lo16(Ltmp0) -; STATIC: addis r5, r5, ha16(Ltmp0) -; STATIC: stw r5 +; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb) +; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb) +; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]] +; PIC: stw r[[R2]] +; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0) +; STATIC: addis r[[R0]], r[[R0]], ha16(Ltmp0) +; STATIC: stw r[[R0]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll index 9ab8d997c0d0..5b02e187ae88 100644 --- a/test/CodeGen/PowerPC/mulhs.ll +++ b/test/CodeGen/PowerPC/mulhs.ll @@ -5,7 +5,7 @@ ; RUN: not grep add %t ; RUN: grep mulhw %t | count 1 -define i32 @mulhs(i32 %a, i32 %b) { +define i32 @mulhs(i32 %a, i32 %b) nounwind { entry: %tmp.1 = sext i32 %a to i64 ; [#uses=1] %tmp.3 = sext i32 %b to i64 ; [#uses=1] diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll index 2ebfd3c319fc..553837121a36 100644 --- a/test/CodeGen/PowerPC/ppc-prologue.ll +++ b/test/CodeGen/PowerPC/ppc-prologue.ll @@ -5,9 +5,7 @@ define i32 @_Z4funci(i32 %a) ssp { ; CHECK-NEXT: stw r31, -4(r1) ; CHECK-NEXT: stw r0, 8(r1) ; CHECK-NEXT: stwu r1, -80(r1) -; CHECK-NEXT: Ltmp0: -; CHECK-NEXT: mr r31, r1 -; CHECK-NEXT: Ltmp1: +; CHECK: mr r31, r1 entry: %a_addr = alloca i32 ; [#uses=2] %retval = alloca i32 ; [#uses=2] diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll index fbf724270566..9e6583ca2ce1 100644 --- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll +++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll @@ -1,5 +1,7 @@ ;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8 ;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9 +;RUN: llc -march=sparc -regalloc=basic < %s | FileCheck %s -check-prefix=V8 +;RUN: llc -march=sparc -regalloc=basic -mattr=v9 < %s | FileCheck %s -check-prefix=V9 define i8* @frameaddr() nounwind readnone { entry: diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index 98feb83231dc..92f54675b72b 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -regalloc=basic | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" target triple = "s390x-ibm-linux" @@ -8,7 +9,7 @@ declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) n declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind { -; CHECK: lg %r11, 328(%r15) +; CHECK: lg %r{{[0-9]+}}, 328(%r15) newFuncRoot: br label %bb3 diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll index 39612c00e4f6..d6ca0d793351 100644 --- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll +++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 -verify-machineinstrs | FileCheck %s ; rdar://7157006 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll deleted file mode 100644 index fad26693e768..000000000000 --- a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s - -target triple = "thumbv6-apple-darwin10" - -@fred = internal global i32 0 ; [#uses=1] - -define void @foo() nounwind { -entry: -; CHECK: str r0, [sp - %0 = call i32 (...)* @bar() nounwind ; [#uses=1] -; CHECK: blx _bar -; CHECK: ldr r1, [sp - store i32 %0, i32* @fred, align 4 - br label %return - -return: ; preds = %entry - ret void -} - -declare i32 @bar(...) diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 06c0dfec5bab..9f5a677ed356 100644 --- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -10,7 +10,7 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { ; CHECK: blx ___muldf3 ; CHECK: blx ___muldf3 -; CHECK: beq LBB0_7 +; CHECK: beq LBB0 ; CHECK: blx ___muldf3 ;