diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.h')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 1728 |
1 files changed, 856 insertions, 872 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3a17099da38f..7f3dc90a2d73 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -14,8 +14,6 @@ #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" namespace llvm { @@ -24,680 +22,809 @@ namespace llvm { namespace X86ISD { // X86 Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// Bit scan forward. - BSF, - /// Bit scan reverse. - BSR, - - /// Double shift instructions. These correspond to - /// X86::SHLDxx and X86::SHRDxx instructions. - SHLD, - SHRD, - - /// Bitwise logical AND of floating point values. This corresponds - /// to X86::ANDPS or X86::ANDPD. - FAND, - - /// Bitwise logical OR of floating point values. This corresponds - /// to X86::ORPS or X86::ORPD. - FOR, - - /// Bitwise logical XOR of floating point values. This corresponds - /// to X86::XORPS or X86::XORPD. - FXOR, - - /// Bitwise logical ANDNOT of floating point values. This - /// corresponds to X86::ANDNPS or X86::ANDNPD. - FANDN, - - /// These operations represent an abstract X86 call - /// instruction, which includes a bunch of information. In particular the - /// operands of these node are: - /// - /// #0 - The incoming token chain - /// #1 - The callee - /// #2 - The number of arg bytes the caller pushes on the stack. - /// #3 - The number of arg bytes the callee pops off the stack. - /// #4 - The value to pass in AL/AX/EAX (optional) - /// #5 - The value to pass in DL/DX/EDX (optional) - /// - /// The result values of these nodes are: - /// - /// #0 - The outgoing token chain - /// #1 - The first register result value (optional) - /// #2 - The second register result value (optional) - /// - CALL, - - /// Same as call except it adds the NoTrack prefix. - NT_CALL, - - /// X86 compare and logical compare instructions. - CMP, COMI, UCOMI, - - /// X86 bit-test instructions. - BT, - - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS - /// operand, usually produced by a CMP instruction. - SETCC, - - /// X86 Select - SELECTS, - - // Same as SETCC except it's materialized with a sbb and the value is all - // one's or all zero's. - SETCC_CARRY, // R = carry_bit ? ~0 : 0 - - /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. - /// Operands are two FP values to compare; result is a mask of - /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCC, - - /// X86 FP SETCC, similar to above, but with output as an i1 mask and - /// and a version with SAE. - FSETCCM, FSETCCM_SAE, - - /// X86 conditional moves. Operand 0 and operand 1 are the two values - /// to select from. Operand 2 is the condition code, and operand 3 is the - /// flag operand produced by a CMP or TEST instruction. - CMOV, - - /// X86 conditional branches. Operand 0 is the chain operand, operand 1 - /// is the block to branch if condition is true, operand 2 is the - /// condition code, and operand 3 is the flag operand produced by a CMP - /// or TEST instruction. - BRCOND, - - /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and - /// operand 1 is the target address. - NT_BRIND, - - /// Return with a flag operand. Operand 0 is the chain operand, operand - /// 1 is the number of bytes of stack to pop. - RET_FLAG, - - /// Return from interrupt. Operand 0 is the number of bytes to pop. - IRET, - - /// Repeat fill, corresponds to X86::REP_STOSx. - REP_STOS, - - /// Repeat move, corresponds to X86::REP_MOVSx. - REP_MOVS, - - /// On Darwin, this node represents the result of the popl - /// at function entry, used for PIC code. - GlobalBaseReg, - - /// A wrapper node for TargetConstantPool, TargetJumpTable, - /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, - /// MCSymbol and TargetBlockAddress. - Wrapper, - - /// Special wrapper used under X86-64 PIC mode for RIP - /// relative displacements. - WrapperRIP, - - /// Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - - /// Copies a 64-bit value from the low word of an XMM vector - /// to an MMX vector. - MOVDQ2Q, - - /// Copies a 32-bit value from the low word of a MMX - /// vector to a GPR. - MMX_MOVD2W, - - /// Copies a GPR into the low 32-bit word of a MMX vector - /// and zero out the high word. - MMX_MOVW2D, - - /// Extract an 8-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRB. - PEXTRB, - - /// Extract a 16-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRW. - PEXTRW, - - /// Insert any element of a 4 x float vector into any element - /// of a destination 4 x floatvector. - INSERTPS, - - /// Insert the lower 8-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRB. - PINSRB, - - /// Insert the lower 16-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRW. - PINSRW, - - /// Shuffle 16 8-bit values within a vector. - PSHUFB, - - /// Compute Sum of Absolute Differences. - PSADBW, - /// Compute Double Block Packed Sum-Absolute-Differences - DBPSADBW, - - /// Bitwise Logical AND NOT of Packed FP values. - ANDNP, - - /// Blend where the selector is an immediate. - BLENDI, - - /// Dynamic (non-constant condition) vector blend where only the sign bits - /// of the condition elements are used. This is used to enforce that the - /// condition mask is not valid for generic VSELECT optimizations. This - /// is also used to implement the intrinsics. - /// Operands are in VSELECT order: MASK, TRUE, FALSE - BLENDV, - - /// Combined add and sub on an FP vector. - ADDSUB, - - // FP vector ops with rounding mode. - FADD_RND, FADDS, FADDS_RND, - FSUB_RND, FSUBS, FSUBS_RND, - FMUL_RND, FMULS, FMULS_RND, - FDIV_RND, FDIVS, FDIVS_RND, - FMAX_SAE, FMAXS_SAE, - FMIN_SAE, FMINS_SAE, - FSQRT_RND, FSQRTS, FSQRTS_RND, - - // FP vector get exponent. - FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE, - // Extract Normalized Mantissas. - VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, - // FP Scale. - SCALEF, SCALEF_RND, - SCALEFS, SCALEFS_RND, - - // Unsigned Integer average. - AVG, - - /// Integer horizontal add/sub. - HADD, - HSUB, - - /// Floating point horizontal add/sub. - FHADD, - FHSUB, - - // Detect Conflicts Within a Vector - CONFLICT, - - /// Floating point max and min. - FMAX, FMIN, - - /// Commutative FMIN and FMAX. - FMAXC, FMINC, - - /// Scalar intrinsic floating point max and min. - FMAXS, FMINS, - - /// Floating point reciprocal-sqrt and reciprocal approximation. - /// Note that these typically require refinement - /// in order to obtain suitable precision. - FRSQRT, FRCP, - - // AVX-512 reciprocal approximations with a little more precision. - RSQRT14, RSQRT14S, RCP14, RCP14S, - - // Thread Local Storage. - TLSADDR, - - // Thread Local Storage. A call to get the start address - // of the TLS block for the current module. - TLSBASEADDR, - - // Thread Local Storage. When calling to an OS provided - // thunk at the address from an earlier relocation. - TLSCALL, + enum NodeType : unsigned { + // Start the numbering where the builtin ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// Bit scan forward. + BSF, + /// Bit scan reverse. + BSR, + + /// X86 funnel/double shift i16 instructions. These correspond to + /// X86::SHLDW and X86::SHRDW instructions which have different amt + /// modulo rules to generic funnel shifts. + /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. + FSHL, + FSHR, + + /// Bitwise logical AND of floating point values. This corresponds + /// to X86::ANDPS or X86::ANDPD. + FAND, + + /// Bitwise logical OR of floating point values. This corresponds + /// to X86::ORPS or X86::ORPD. + FOR, + + /// Bitwise logical XOR of floating point values. This corresponds + /// to X86::XORPS or X86::XORPD. + FXOR, + + /// Bitwise logical ANDNOT of floating point values. This + /// corresponds to X86::ANDNPS or X86::ANDNPD. + FANDN, + + /// These operations represent an abstract X86 call + /// instruction, which includes a bunch of information. In particular the + /// operands of these node are: + /// + /// #0 - The incoming token chain + /// #1 - The callee + /// #2 - The number of arg bytes the caller pushes on the stack. + /// #3 - The number of arg bytes the callee pops off the stack. + /// #4 - The value to pass in AL/AX/EAX (optional) + /// #5 - The value to pass in DL/DX/EDX (optional) + /// + /// The result values of these nodes are: + /// + /// #0 - The outgoing token chain + /// #1 - The first register result value (optional) + /// #2 - The second register result value (optional) + /// + CALL, - // Exception Handling helpers. - EH_RETURN, + /// Same as call except it adds the NoTrack prefix. + NT_CALL, - // SjLj exception handling setjmp. - EH_SJLJ_SETJMP, + /// X86 compare and logical compare instructions. + CMP, + FCMP, + COMI, + UCOMI, - // SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, + /// X86 bit-test instructions. + BT, - // SjLj exception handling dispatch. - EH_SJLJ_SETUP_DISPATCH, + /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS + /// operand, usually produced by a CMP instruction. + SETCC, - /// Tail call return. See X86TargetLowering::LowerCall for - /// the list of operands. - TC_RETURN, + /// X86 Select + SELECTS, - // Vector move to low scalar and zero higher vector elements. - VZEXT_MOVL, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, // R = carry_bit ? ~0 : 0 - // Vector integer truncate. - VTRUNC, - // Vector integer truncate with unsigned/signed saturation. - VTRUNCUS, VTRUNCS, + /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. + /// Operands are two FP values to compare; result is a mask of + /// 0s or 1s. Generally DTRT for C/C++ with NaNs. + FSETCC, - // Masked version of the above. Used when less than a 128-bit result is - // produced since the mask only applies to the lower elements and can't - // be represented by a select. - // SRC, PASSTHRU, MASK - VMTRUNC, VMTRUNCUS, VMTRUNCS, - - // Vector FP extend. - VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, - - // Vector FP round. - VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, - - // Masked version of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - VMFPROUND, - - // 128-bit vector logical left / right shift - VSHLDQ, VSRLDQ, - - // Vector shift elements - VSHL, VSRL, VSRA, - - // Vector variable shift - VSHLV, VSRLV, VSRAV, - - // Vector shift elements by immediate - VSHLI, VSRLI, VSRAI, - - // Shifts of mask registers. - KSHIFTL, KSHIFTR, - - // Bit rotate by immediate - VROTLI, VROTRI, - - // Vector packed double/float comparison. - CMPP, - - // Vector integer comparisons. - PCMPEQ, PCMPGT, - - // v8i16 Horizontal minimum and position. - PHMINPOS, - - MULTISHIFT, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - CMPM, - // Vector comparison with SAE for FP values - CMPM_SAE, - - // Arithmetic operations with FLAGS results. - ADD, SUB, ADC, SBB, SMUL, UMUL, - OR, XOR, AND, - - // Bit field extract. - BEXTR, - - // Zero High Bits Starting with Specified Bit Position. - BZHI, - - // X86-specific multiply by immediate. - MUL_IMM, - - // Vector sign bit extraction. - MOVMSK, - - // Vector bitwise comparisons. - PTEST, - - // Vector packed fp sign bitwise comparisons. - TESTP, - - // OR/AND test for masks. - KORTEST, - KTEST, - - // ADD for masks. - KADD, - - // Several flavors of instructions with vector shuffle behaviors. - // Saturated signed/unnsigned packing. - PACKSS, - PACKUS, - // Intra-lane alignr. - PALIGNR, - // AVX512 inter-lane alignr. - VALIGN, - PSHUFD, - PSHUFHW, - PSHUFLW, - SHUFP, - // VBMI2 Concat & Shift. - VSHLD, - VSHRD, - VSHLDV, - VSHRDV, - //Shuffle Packed Values at 128-bit granularity. - SHUF128, - MOVDDUP, - MOVSHDUP, - MOVSLDUP, - MOVLHPS, - MOVHLPS, - MOVSD, - MOVSS, - UNPCKL, - UNPCKH, - VPERMILPV, - VPERMILPI, - VPERMI, - VPERM2X128, - - // Variable Permute (VPERM). - // Res = VPERMV MaskV, V0 - VPERMV, - - // 3-op Variable Permute (VPERMT2). - // Res = VPERMV3 V0, MaskV, V1 - VPERMV3, - - // Bitwise ternary logic. - VPTERNLOG, - // Fix Up Special Packed Float32/64 values. - VFIXUPIMM, VFIXUPIMM_SAE, - VFIXUPIMMS, VFIXUPIMMS_SAE, - // Range Restriction Calculation For Packed Pairs of Float32/64 values. - VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE, - // Reduce - Perform Reduction Transformation on scalar\packed FP. - VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE, - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, - // Tests Types Of a FP Values for packed types. - VFPCLASS, - // Tests Types Of a FP Values for scalar types. - VFPCLASSS, - - // Broadcast (splat) scalar or element 0 of a vector. If the operand is - // a vector, this node may change the vector length as part of the splat. - VBROADCAST, - // Broadcast mask to vector. - VBROADCASTM, - // Broadcast subvector to vector. - SUBV_BROADCAST, - - /// SSE4A Extraction and Insertion. - EXTRQI, INSERTQI, - - // XOP arithmetic/logical shifts. - VPSHA, VPSHL, - // XOP signed/unsigned integer comparisons. - VPCOM, VPCOMU, - // XOP packed permute bytes. - VPPERM, - // XOP two source permutation. - VPERMIL2, - - // Vector multiply packed unsigned doubleword integers. - PMULUDQ, - // Vector multiply packed signed doubleword integers. - PMULDQ, - // Vector Multiply Packed UnsignedIntegers with Round and Scale. - MULHRS, - - // Multiply and Add Packed Integers. - VPMADDUBSW, VPMADDWD, - - // AVX512IFMA multiply and add. - // NOTE: These are different than the instruction and perform - // op0 x op1 + op2. - VPMADD52L, VPMADD52H, - - // VNNI - VPDPBUSD, - VPDPBUSDS, - VPDPWSSD, - VPDPWSSDS, - - // FMA nodes. - // We use the target independent ISD::FMA for the non-inverted case. - FNMADD, - FMSUB, - FNMSUB, - FMADDSUB, - FMSUBADD, - - // FMA with rounding mode. - FMADD_RND, - FNMADD_RND, - FMSUB_RND, - FNMSUB_RND, - FMADDSUB_RND, - FMSUBADD_RND, - - // Compress and expand. - COMPRESS, - EXPAND, - - // Bits shuffle - VPSHUFBITQMB, - - // Convert Unsigned/Integer to Floating-Point Value with rounding mode. - SINT_TO_FP_RND, UINT_TO_FP_RND, - SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP, - SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, - - // Vector float/double to signed/unsigned integer. - CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, - // Scalar float/double to signed/unsigned integer. - CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, - - // Vector float/double to signed/unsigned integer with truncation. - CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, - // Scalar float/double to signed/unsigned integer with truncation. - CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, - - // Vector signed/unsigned integer to float/double. - CVTSI2P, CVTUI2P, - - // Masked versions of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, - MCVTSI2P, MCVTUI2P, - - // Vector float to bfloat16. - // Convert TWO packed single data to one packed BF16 data - CVTNE2PS2BF16, - // Convert packed single data to packed BF16 data - CVTNEPS2BF16, - // Masked version of above. - // SRC, PASSTHRU, MASK - MCVTNEPS2BF16, - - // Dot product of BF16 pairs to accumulated into - // packed single precision. - DPBF16PS, - - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - - // Windows's _chkstk call to do stack probing. - WIN_ALLOCA, - - // For allocating variable amounts of stack space when using - // segmented stacks. Check if the current stacklet has enough space, and - // falls back to heap allocation if not. - SEG_ALLOCA, - - // Memory barriers. - MEMBARRIER, - MFENCE, - - // Store FP status word into i16 register. - FNSTSW16r, - - // Store contents of %ah into %eflags. - SAHF, - - // Get a random integer and indicate whether it is valid in CF. - RDRAND, - - // Get a NIST SP800-90B & C compliant random integer and - // indicate whether it is valid in CF. - RDSEED, - - // Protection keys - // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. - // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is - // value for ECX. - RDPKRU, WRPKRU, - - // SSE42 string comparisons. - // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG - // will emit one or two instructions based on which results are used. If - // flags and index/mask this allows us to use a single instruction since - // we won't have to pick and opcode for flags. Instead we can rely on the - // DAG to CSE everything and decide at isel. - PCMPISTR, - PCMPESTR, - - // Test if in transactional execution. - XTEST, - - // ERI instructions. - RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE, - RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE, - - // Conversions between float and half-float. - CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE, - - // Masked version of above. - // SRC, RND, PASSTHRU, MASK - MCVTPS2PH, - - // Galois Field Arithmetic Instructions - GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, - - // LWP insert record. - LWPINS, - - // User level wait - UMWAIT, TPAUSE, - - // Enqueue Stores Instructions - ENQCMD, ENQCMDS, - - // For avx512-vp2intersect - VP2INTERSECT, - - /// X86 strict FP compare instructions. - STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Vector packed double/float comparison. - STRICT_CMPP, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - STRICT_CMPM, - - // Vector float/double to signed/unsigned integer with truncation. - STRICT_CVTTP2SI, STRICT_CVTTP2UI, - - // Vector FP extend. - STRICT_VFPEXT, - - // Vector FP round. - STRICT_VFPROUND, - - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - STRICT_VRNDSCALE, - - // Vector signed/unsigned integer to float/double. - STRICT_CVTSI2P, STRICT_CVTUI2P, - - // Compare and swap. - LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, - LCMPXCHG8_DAG, - LCMPXCHG16_DAG, - LCMPXCHG8_SAVE_EBX_DAG, - LCMPXCHG16_SAVE_RBX_DAG, - - /// LOCK-prefixed arithmetic read-modify-write instructions. - /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) - LADD, LSUB, LOR, LXOR, LAND, - - // Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - - // extract_vector_elt, store. - VEXTRACT_STORE, - - // scalar broadcast from memory - VBROADCAST_LOAD, - - // Store FP control world into i16 memory. - FNSTCW16m, - - /// This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). Memory VT specifies the type to store to. - FP_TO_INT_IN_MEM, - - /// This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). FILD_FLAG also produces a - /// flag). The integer source type is specified by the memory VT. - FILD, - FILD_FLAG, - - /// This instruction implements a fp->int store from FP stack - /// slots. This corresponds to the fist instruction. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FIST, - - /// This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, and ptr to load from. The memory VT specifies the type to - /// load from. - FLD, + /// X86 FP SETCC, similar to above, but with output as an i1 mask and + /// and a version with SAE. + FSETCCM, + FSETCCM_SAE, - /// This instruction implements a truncating store from FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FST, - - /// This instruction grabs the address of the next argument - /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64, - - // Vector truncating store with unsigned/signed saturation - VTRUNCSTOREUS, VTRUNCSTORES, - // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, VMTRUNCSTORES, - - // X86 specific gather and scatter - MGATHER, MSCATTER, - - // WARNING: Do not add anything in the end unless you want the node to - // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all - // opcodes will be thought as target memory ops! - }; + /// X86 conditional moves. Operand 0 and operand 1 are the two values + /// to select from. Operand 2 is the condition code, and operand 3 is the + /// flag operand produced by a CMP or TEST instruction. + CMOV, + + /// X86 conditional branches. Operand 0 is the chain operand, operand 1 + /// is the block to branch if condition is true, operand 2 is the + /// condition code, and operand 3 is the flag operand produced by a CMP + /// or TEST instruction. + BRCOND, + + /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and + /// operand 1 is the target address. + NT_BRIND, + + /// Return with a flag operand. Operand 0 is the chain operand, operand + /// 1 is the number of bytes of stack to pop. + RET_FLAG, + + /// Return from interrupt. Operand 0 is the number of bytes to pop. + IRET, + + /// Repeat fill, corresponds to X86::REP_STOSx. + REP_STOS, + + /// Repeat move, corresponds to X86::REP_MOVSx. + REP_MOVS, + + /// On Darwin, this node represents the result of the popl + /// at function entry, used for PIC code. + GlobalBaseReg, + + /// A wrapper node for TargetConstantPool, TargetJumpTable, + /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, + /// MCSymbol and TargetBlockAddress. + Wrapper, + + /// Special wrapper used under X86-64 PIC mode for RIP + /// relative displacements. + WrapperRIP, + + /// Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. + MOVQ2DQ, + + /// Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. + MOVDQ2Q, + + /// Copies a 32-bit value from the low word of a MMX + /// vector to a GPR. + MMX_MOVD2W, + + /// Copies a GPR into the low 32-bit word of a MMX vector + /// and zero out the high word. + MMX_MOVW2D, + + /// Extract an 8-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRB. + PEXTRB, + + /// Extract a 16-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRW. + PEXTRW, + + /// Insert any element of a 4 x float vector into any element + /// of a destination 4 x floatvector. + INSERTPS, + + /// Insert the lower 8-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRB. + PINSRB, + + /// Insert the lower 16-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRW. + PINSRW, + + /// Shuffle 16 8-bit values within a vector. + PSHUFB, + + /// Compute Sum of Absolute Differences. + PSADBW, + /// Compute Double Block Packed Sum-Absolute-Differences + DBPSADBW, + + /// Bitwise Logical AND NOT of Packed FP values. + ANDNP, + + /// Blend where the selector is an immediate. + BLENDI, + + /// Dynamic (non-constant condition) vector blend where only the sign bits + /// of the condition elements are used. This is used to enforce that the + /// condition mask is not valid for generic VSELECT optimizations. This + /// is also used to implement the intrinsics. + /// Operands are in VSELECT order: MASK, TRUE, FALSE + BLENDV, + + /// Combined add and sub on an FP vector. + ADDSUB, + + // FP vector ops with rounding mode. + FADD_RND, + FADDS, + FADDS_RND, + FSUB_RND, + FSUBS, + FSUBS_RND, + FMUL_RND, + FMULS, + FMULS_RND, + FDIV_RND, + FDIVS, + FDIVS_RND, + FMAX_SAE, + FMAXS_SAE, + FMIN_SAE, + FMINS_SAE, + FSQRT_RND, + FSQRTS, + FSQRTS_RND, + + // FP vector get exponent. + FGETEXP, + FGETEXP_SAE, + FGETEXPS, + FGETEXPS_SAE, + // Extract Normalized Mantissas. + VGETMANT, + VGETMANT_SAE, + VGETMANTS, + VGETMANTS_SAE, + // FP Scale. + SCALEF, + SCALEF_RND, + SCALEFS, + SCALEFS_RND, + + // Unsigned Integer average. + AVG, + + /// Integer horizontal add/sub. + HADD, + HSUB, + + /// Floating point horizontal add/sub. + FHADD, + FHSUB, + + // Detect Conflicts Within a Vector + CONFLICT, + + /// Floating point max and min. + FMAX, + FMIN, + + /// Commutative FMIN and FMAX. + FMAXC, + FMINC, + + /// Scalar intrinsic floating point max and min. + FMAXS, + FMINS, + + /// Floating point reciprocal-sqrt and reciprocal approximation. + /// Note that these typically require refinement + /// in order to obtain suitable precision. + FRSQRT, + FRCP, + + // AVX-512 reciprocal approximations with a little more precision. + RSQRT14, + RSQRT14S, + RCP14, + RCP14S, + + // Thread Local Storage. + TLSADDR, + + // Thread Local Storage. A call to get the start address + // of the TLS block for the current module. + TLSBASEADDR, + + // Thread Local Storage. When calling to an OS provided + // thunk at the address from an earlier relocation. + TLSCALL, + + // Exception Handling helpers. + EH_RETURN, + + // SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + + // SjLj exception handling dispatch. + EH_SJLJ_SETUP_DISPATCH, + + /// Tail call return. See X86TargetLowering::LowerCall for + /// the list of operands. + TC_RETURN, + + // Vector move to low scalar and zero higher vector elements. + VZEXT_MOVL, + + // Vector integer truncate. + VTRUNC, + // Vector integer truncate with unsigned/signed saturation. + VTRUNCUS, + VTRUNCS, + + // Masked version of the above. Used when less than a 128-bit result is + // produced since the mask only applies to the lower elements and can't + // be represented by a select. + // SRC, PASSTHRU, MASK + VMTRUNC, + VMTRUNCUS, + VMTRUNCS, + + // Vector FP extend. + VFPEXT, + VFPEXT_SAE, + VFPEXTS, + VFPEXTS_SAE, + + // Vector FP round. + VFPROUND, + VFPROUND_RND, + VFPROUNDS, + VFPROUNDS_RND, + + // Masked version of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + VMFPROUND, + + // 128-bit vector logical left / right shift + VSHLDQ, + VSRLDQ, + + // Vector shift elements + VSHL, + VSRL, + VSRA, + + // Vector variable shift + VSHLV, + VSRLV, + VSRAV, + + // Vector shift elements by immediate + VSHLI, + VSRLI, + VSRAI, + + // Shifts of mask registers. + KSHIFTL, + KSHIFTR, + + // Bit rotate by immediate + VROTLI, + VROTRI, + + // Vector packed double/float comparison. + CMPP, + + // Vector integer comparisons. + PCMPEQ, + PCMPGT, + + // v8i16 Horizontal minimum and position. + PHMINPOS, + + MULTISHIFT, + + /// Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + CMPM, + // Vector comparison with SAE for FP values + CMPM_SAE, + + // Arithmetic operations with FLAGS results. + ADD, + SUB, + ADC, + SBB, + SMUL, + UMUL, + OR, + XOR, + AND, + + // Bit field extract. + BEXTR, + + // Zero High Bits Starting with Specified Bit Position. + BZHI, + + // Parallel extract and deposit. + PDEP, + PEXT, + + // X86-specific multiply by immediate. + MUL_IMM, + + // Vector sign bit extraction. + MOVMSK, + + // Vector bitwise comparisons. + PTEST, + + // Vector packed fp sign bitwise comparisons. + TESTP, + + // OR/AND test for masks. + KORTEST, + KTEST, + + // ADD for masks. + KADD, + + // Several flavors of instructions with vector shuffle behaviors. + // Saturated signed/unnsigned packing. + PACKSS, + PACKUS, + // Intra-lane alignr. + PALIGNR, + // AVX512 inter-lane alignr. + VALIGN, + PSHUFD, + PSHUFHW, + PSHUFLW, + SHUFP, + // VBMI2 Concat & Shift. + VSHLD, + VSHRD, + VSHLDV, + VSHRDV, + // Shuffle Packed Values at 128-bit granularity. + SHUF128, + MOVDDUP, + MOVSHDUP, + MOVSLDUP, + MOVLHPS, + MOVHLPS, + MOVSD, + MOVSS, + UNPCKL, + UNPCKH, + VPERMILPV, + VPERMILPI, + VPERMI, + VPERM2X128, + + // Variable Permute (VPERM). + // Res = VPERMV MaskV, V0 + VPERMV, + + // 3-op Variable Permute (VPERMT2). + // Res = VPERMV3 V0, MaskV, V1 + VPERMV3, + + // Bitwise ternary logic. + VPTERNLOG, + // Fix Up Special Packed Float32/64 values. + VFIXUPIMM, + VFIXUPIMM_SAE, + VFIXUPIMMS, + VFIXUPIMMS_SAE, + // Range Restriction Calculation For Packed Pairs of Float32/64 values. + VRANGE, + VRANGE_SAE, + VRANGES, + VRANGES_SAE, + // Reduce - Perform Reduction Transformation on scalar\packed FP. + VREDUCE, + VREDUCE_SAE, + VREDUCES, + VREDUCES_SAE, + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. + // Also used by the legacy (V)ROUND intrinsics where we mask out the + // scaling part of the immediate. + VRNDSCALE, + VRNDSCALE_SAE, + VRNDSCALES, + VRNDSCALES_SAE, + // Tests Types Of a FP Values for packed types. + VFPCLASS, + // Tests Types Of a FP Values for scalar types. + VFPCLASSS, + + // Broadcast (splat) scalar or element 0 of a vector. If the operand is + // a vector, this node may change the vector length as part of the splat. + VBROADCAST, + // Broadcast mask to vector. + VBROADCASTM, + // Broadcast subvector to vector. + SUBV_BROADCAST, + + /// SSE4A Extraction and Insertion. + EXTRQI, + INSERTQI, + + // XOP arithmetic/logical shifts. + VPSHA, + VPSHL, + // XOP signed/unsigned integer comparisons. + VPCOM, + VPCOMU, + // XOP packed permute bytes. + VPPERM, + // XOP two source permutation. + VPERMIL2, + + // Vector multiply packed unsigned doubleword integers. + PMULUDQ, + // Vector multiply packed signed doubleword integers. + PMULDQ, + // Vector Multiply Packed UnsignedIntegers with Round and Scale. + MULHRS, + + // Multiply and Add Packed Integers. + VPMADDUBSW, + VPMADDWD, + + // AVX512IFMA multiply and add. + // NOTE: These are different than the instruction and perform + // op0 x op1 + op2. + VPMADD52L, + VPMADD52H, + + // VNNI + VPDPBUSD, + VPDPBUSDS, + VPDPWSSD, + VPDPWSSDS, + + // FMA nodes. + // We use the target independent ISD::FMA for the non-inverted case. + FNMADD, + FMSUB, + FNMSUB, + FMADDSUB, + FMSUBADD, + + // FMA with rounding mode. + FMADD_RND, + FNMADD_RND, + FMSUB_RND, + FNMSUB_RND, + FMADDSUB_RND, + FMSUBADD_RND, + + // Compress and expand. + COMPRESS, + EXPAND, + + // Bits shuffle + VPSHUFBITQMB, + + // Convert Unsigned/Integer to Floating-Point Value with rounding mode. + SINT_TO_FP_RND, + UINT_TO_FP_RND, + SCALAR_SINT_TO_FP, + SCALAR_UINT_TO_FP, + SCALAR_SINT_TO_FP_RND, + SCALAR_UINT_TO_FP_RND, + + // Vector float/double to signed/unsigned integer. + CVTP2SI, + CVTP2UI, + CVTP2SI_RND, + CVTP2UI_RND, + // Scalar float/double to signed/unsigned integer. + CVTS2SI, + CVTS2UI, + CVTS2SI_RND, + CVTS2UI_RND, + + // Vector float/double to signed/unsigned integer with truncation. + CVTTP2SI, + CVTTP2UI, + CVTTP2SI_SAE, + CVTTP2UI_SAE, + // Scalar float/double to signed/unsigned integer with truncation. + CVTTS2SI, + CVTTS2UI, + CVTTS2SI_SAE, + CVTTS2UI_SAE, + + // Vector signed/unsigned integer to float/double. + CVTSI2P, + CVTUI2P, + + // Masked versions of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + MCVTP2SI, + MCVTP2UI, + MCVTTP2SI, + MCVTTP2UI, + MCVTSI2P, + MCVTUI2P, + + // Vector float to bfloat16. + // Convert TWO packed single data to one packed BF16 data + CVTNE2PS2BF16, + // Convert packed single data to packed BF16 data + CVTNEPS2BF16, + // Masked version of above. + // SRC, PASSTHRU, MASK + MCVTNEPS2BF16, + + // Dot product of BF16 pairs to accumulated into + // packed single precision. + DPBF16PS, + + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + + // Windows's _chkstk call to do stack probing. + WIN_ALLOCA, + + // For allocating variable amounts of stack space when using + // segmented stacks. Check if the current stacklet has enough space, and + // falls back to heap allocation if not. + SEG_ALLOCA, + + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + + // Memory barriers. + MEMBARRIER, + MFENCE, + + // Get a random integer and indicate whether it is valid in CF. + RDRAND, + + // Get a NIST SP800-90B & C compliant random integer and + // indicate whether it is valid in CF. + RDSEED, + + // Protection keys + // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. + // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is + // value for ECX. + RDPKRU, + WRPKRU, + + // SSE42 string comparisons. + // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG + // will emit one or two instructions based on which results are used. If + // flags and index/mask this allows us to use a single instruction since + // we won't have to pick and opcode for flags. Instead we can rely on the + // DAG to CSE everything and decide at isel. + PCMPISTR, + PCMPESTR, + + // Test if in transactional execution. + XTEST, + + // ERI instructions. + RSQRT28, + RSQRT28_SAE, + RSQRT28S, + RSQRT28S_SAE, + RCP28, + RCP28_SAE, + RCP28S, + RCP28S_SAE, + EXP2, + EXP2_SAE, + + // Conversions between float and half-float. + CVTPS2PH, + CVTPH2PS, + CVTPH2PS_SAE, + + // Masked version of above. + // SRC, RND, PASSTHRU, MASK + MCVTPS2PH, + + // Galois Field Arithmetic Instructions + GF2P8AFFINEINVQB, + GF2P8AFFINEQB, + GF2P8MULB, + + // LWP insert record. + LWPINS, + + // User level wait + UMWAIT, + TPAUSE, + + // Enqueue Stores Instructions + ENQCMD, + ENQCMDS, + + // For avx512-vp2intersect + VP2INTERSECT, + + /// X86 strict FP compare instructions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Vector packed double/float comparison. + STRICT_CMPP, + + /// Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + STRICT_CMPM, + + // Vector float/double to signed/unsigned integer with truncation. + STRICT_CVTTP2SI, + STRICT_CVTTP2UI, + + // Vector FP extend. + STRICT_VFPEXT, + + // Vector FP round. + STRICT_VFPROUND, + + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. + // Also used by the legacy (V)ROUND intrinsics where we mask out the + // scaling part of the immediate. + STRICT_VRNDSCALE, + + // Vector signed/unsigned integer to float/double. + STRICT_CVTSI2P, + STRICT_CVTUI2P, + + // Strict FMA nodes. + STRICT_FNMADD, + STRICT_FMSUB, + STRICT_FNMSUB, + + // Conversions between float and half-float. + STRICT_CVTPS2PH, + STRICT_CVTPH2PS, + + // Compare and swap. + LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, + LCMPXCHG8_DAG, + LCMPXCHG16_DAG, + LCMPXCHG8_SAVE_EBX_DAG, + LCMPXCHG16_SAVE_RBX_DAG, + + /// LOCK-prefixed arithmetic read-modify-write instructions. + /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) + LADD, + LSUB, + LOR, + LXOR, + LAND, + + // Load, scalar_to_vector, and zero extend. + VZEXT_LOAD, + + // extract_vector_elt, store. + VEXTRACT_STORE, + + // scalar broadcast from memory + VBROADCAST_LOAD, + + // Store FP control world into i16 memory. + FNSTCW16m, + + /// This instruction implements FP_TO_SINT with the + /// integer destination in memory and a FP reg source. This corresponds + /// to the X86::FIST*m instructions and the rounding mode change stuff. It + /// has two inputs (token chain and address) and two outputs (int value + /// and token chain). Memory VT specifies the type to store to. + FP_TO_INT_IN_MEM, + + /// This instruction implements SINT_TO_FP with the + /// integer source in memory and FP reg result. This corresponds to the + /// X86::FILD*m instructions. It has two inputs (token chain and address) + /// and two outputs (FP value and token chain). The integer source type is + /// specified by the memory VT. + FILD, + + /// This instruction implements a fp->int store from FP stack + /// slots. This corresponds to the fist instruction. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FIST, + + /// This instruction implements an extending load to FP stack slots. + /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain + /// operand, and ptr to load from. The memory VT specifies the type to + /// load from. + FLD, + + /// This instruction implements a truncating store from FP stack + /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FST, + + /// This instruction grabs the address of the next argument + /// from a va_list. (reads and modifies the va_list in memory) + VAARG_64, + + // Vector truncating store with unsigned/signed saturation + VTRUNCSTOREUS, + VTRUNCSTORES, + // Vector truncating masked store with unsigned/signed saturation + VMTRUNCSTOREUS, + VMTRUNCSTORES, + + // X86 specific gather and scatter + MGATHER, + MSCATTER, + + // WARNING: Do not add anything in the end unless you want the node to + // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all + // opcodes will be thought as target memory ops! + }; } // end namespace X86ISD /// Define some predicates that are used for node matching. @@ -717,7 +844,10 @@ namespace llvm { /// If Op is a constant whose elements are all the same constant or /// undefined, return true and return the constant value in \p SplatVal. - bool isConstantSplat(SDValue Op, APInt &SplatVal); + /// If we have undef bits that don't cover an entire element, we treat these + /// as zero if AllowPartialUndefs is set, else we fail and return false. + bool isConstantSplat(SDValue Op, APInt &SplatVal, + bool AllowPartialUndefs = true); } // end namespace X86 //===--------------------------------------------------------------------===// @@ -756,19 +886,7 @@ namespace llvm { unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; - /// Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove - /// lowering. If DstAlign is zero that means it's safe to destination - /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it - /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If 'IsMemset' is - /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that - /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy - /// source is constant so it does not need to be loaded. - /// It returns EVT::Other if the type should be determined using generic - /// target-independent logic. - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; /// Returns true if it's safe to use load / store of the @@ -805,19 +923,6 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - // Return true if it is profitable to combine a BUILD_VECTOR with a - // stride-pattern to a shuffle and a truncate. - // Example of such a combine: - // v4i32 build_vector((extract_elt V, 1), - // (extract_elt V, 3), - // (extract_elt V, 5), - // (extract_elt V, 7)) - // --> - // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to - // v4i64) - bool isDesirableToCombineBuildVectorToShuffleTruncate( - ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override; - /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 @@ -830,15 +935,12 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; - /// Return 1 if we can compute the negated form of the specified expression - /// for the same cost as the expression itself, or 2 if we can compute the - /// negated form more cheaply than the expression itself. Else return 0. - char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, - bool ForCodeSize, unsigned Depth) const override; - - /// If isNegatibleForFree returns true, return the newly negated expression. + /// Return the newly negated expression if the cost is not expensive and + /// set the cost in \p Cost to indicate that if it is cheaper or neutral to + /// do the negation. SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, + NegatibleCost &Cost, unsigned Depth) const override; MachineBasicBlock * @@ -934,7 +1036,8 @@ namespace llvm { EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; - bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, TargetLoweringOpt &TLO) const override; /// Determine which of the bits specified in Mask are known to be either @@ -958,6 +1061,12 @@ namespace llvm { TargetLoweringOpt &TLO, unsigned Depth) const override; + bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, + const APInt &DemandedElts, + unsigned MaskIndex, + TargetLoweringOpt &TLO, + unsigned Depth) const; + bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, @@ -1047,6 +1156,8 @@ namespace llvm { int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + /// This is used to enable splatted operand transforms for vector shifts + /// and vector funnel shifts. bool isVectorShiftByScalarCheap(Type *Ty) const override; /// Add x86-specific opcodes to the default list. @@ -1075,6 +1186,10 @@ namespace llvm { bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + bool shouldSinkOperands(Instruction *I, + SmallVectorImpl<Use *> &Ops) const override; + bool shouldConvertPhiType(Type *From, Type *To) const override; + /// Return true if folding a vector load into ExtVal (a sign, zero, or any /// extend node) is profitable. bool isVectorLoadExtDesirable(SDValue) const override; @@ -1171,7 +1286,8 @@ namespace llvm { /// Overflow nodes should get combined/lowered to optimal instructions /// (they should allow eliminating explicit compares by getting flags from /// math ops). - bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override; + bool shouldFormOverflowOp(unsigned Opcode, EVT VT, + bool MathUsed) const override; bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const override { @@ -1194,12 +1310,12 @@ namespace llvm { /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. - unsigned + Register getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. - unsigned + Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override; virtual bool needsFixedCatchObjects() const override; @@ -1227,8 +1343,10 @@ namespace llvm { /// offset as appropriate. Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; - std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, + SDValue Chain, SDValue Pointer, + MachinePointerInfo PtrInfo, + Align Alignment, SelectionDAG &DAG) const; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; @@ -1236,6 +1354,8 @@ namespace llvm { /// Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; + bool softPromoteHalfType() const override { return true; } + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override; @@ -1251,6 +1371,8 @@ namespace llvm { bool supportSwiftError() const override; + bool hasStackProbeSymbol(MachineFunction &MF) const override; + bool hasInlineStackProbe(MachineFunction &MF) const override; StringRef getStackProbeSymbolName(MachineFunction &MF) const override; unsigned getStackProbeSize(MachineFunction &MF) const; @@ -1314,7 +1436,7 @@ namespace llvm { SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) const; + ISD::ArgFlagsTy Flags, bool isByval) const; // Call lowering helpers. @@ -1340,8 +1462,9 @@ namespace llvm { unsigned getAddressSpace(void) const; - SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned, + SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, SDValue &Chain) const; + SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -1365,8 +1488,8 @@ namespace llvm { SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -1431,7 +1554,7 @@ namespace llvm { const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; TargetLoweringBase::AtomicExpansionKind - shouldExpandAtomicLoadInIR(LoadInst *SI) const override; + shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; @@ -1464,26 +1587,23 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; @@ -1497,32 +1617,25 @@ namespace llvm { MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, - MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const; - /// Convert a comparison if required by the subtarget. - SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; - /// Emit flags for the given setcc condition and operands. Also returns the /// corresponding X86 condition code constant in X86CC. SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG, - SDValue &X86CC, SDValue &Chain, - bool IsSignaling) const; + SDValue &X86CC) const; /// Check if replacement of SQRT with RSQRT should be disabled. - bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; + bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; /// Use rsqrt* to speed up sqrt calculations. - SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override; /// Use rcp* to speed up fdiv calculations. - SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. @@ -1537,101 +1650,14 @@ namespace llvm { const TargetLibraryInfo *libInfo); } // end namespace X86 - // Base class for all X86 non-masked store operations. - class X86StoreSDNode : public MemSDNode { - public: - X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, - SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} - const SDValue &getValue() const { return getOperand(1); } - const SDValue &getBasePtr() const { return getOperand(2); } - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VTRUNCSTORES || - N->getOpcode() == X86ISD::VTRUNCSTOREUS; - } - }; - - // Base class for all X86 masked store operations. - // The class has the same order of operands as MaskedStoreSDNode for - // convenience. - class X86MaskedStoreSDNode : public MemSDNode { - public: - X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} - - const SDValue &getValue() const { return getOperand(1); } - const SDValue &getBasePtr() const { return getOperand(2); } - const SDValue &getMask() const { return getOperand(3); } - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VMTRUNCSTORES || - N->getOpcode() == X86ISD::VMTRUNCSTOREUS; - } - }; - - // X86 Truncating Store with Signed saturation. - class TruncSStoreSDNode : public X86StoreSDNode { - public: - TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VTRUNCSTORES; - } - }; - - // X86 Truncating Store with Unsigned saturation. - class TruncUSStoreSDNode : public X86StoreSDNode { - public: - TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VTRUNCSTOREUS; - } - }; - - // X86 Truncating Masked Store with Signed saturation. - class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { - public: - MaskedTruncSStoreSDNode(unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VMTRUNCSTORES; - } - }; - - // X86 Truncating Masked Store with Unsigned saturation. - class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { - public: - MaskedTruncUSStoreSDNode(unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} - - static bool classof(const SDNode *N) { - return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; - } - }; - // X86 specific Gather/Scatter nodes. // The class has the same order of operands as MaskedGatherScatterSDNode for // convenience. - class X86MaskedGatherScatterSDNode : public MemSDNode { + class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { public: - X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {} + // This is a intended as a utility and should never be directly created. + X86MaskedGatherScatterSDNode() = delete; + ~X86MaskedGatherScatterSDNode() = delete; const SDValue &getBasePtr() const { return getOperand(3); } const SDValue &getIndex() const { return getOperand(4); } @@ -1646,11 +1672,6 @@ namespace llvm { class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { public: - X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, - MMO) {} - const SDValue &getPassThru() const { return getOperand(1); } static bool classof(const SDNode *N) { @@ -1660,11 +1681,6 @@ namespace llvm { class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { public: - X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT, - MMO) {} - const SDValue &getValue() const { return getOperand(1); } static bool classof(const SDNode *N) { @@ -1673,47 +1689,15 @@ namespace llvm { }; /// Generate unpacklo/unpackhi shuffle mask. - template <typename T = int> - void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo, - bool Unary) { - assert(Mask.empty() && "Expected an empty shuffle mask vector"); - int NumElts = VT.getVectorNumElements(); - int NumEltsInLane = 128 / VT.getScalarSizeInBits(); - for (int i = 0; i < NumElts; ++i) { - unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; - int Pos = (i % NumEltsInLane) / 2 + LaneStart; - Pos += (Unary ? 0 : NumElts * (i % 2)); - Pos += (Lo ? 0 : NumEltsInLane / 2); - Mask.push_back(Pos); - } - } - - /// Helper function to scale a shuffle or target shuffle mask, replacing each - /// mask index with the scaled sequential indices for an equivalent narrowed - /// mask. This is the reverse process to canWidenShuffleElements, but can - /// always succeed. - template <typename T> - void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask, - SmallVectorImpl<T> &ScaledMask) { - assert(0 < Scale && "Unexpected scaling factor"); - size_t NumElts = Mask.size(); - ScaledMask.assign(NumElts * Scale, -1); - - for (size_t i = 0; i != NumElts; ++i) { - int M = Mask[i]; - - // Repeat sentinel values in every mask element. - if (M < 0) { - for (size_t s = 0; s != Scale; ++s) - ScaledMask[(Scale * i) + s] = M; - continue; - } - - // Scale mask element and increment across each mask element. - for (size_t s = 0; s != Scale; ++s) - ScaledMask[(Scale * i) + s] = (Scale * M) + s; - } - } + void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo, + bool Unary); + + /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation + /// imposed by AVX and specific to the unary pattern. Example: + /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> + /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> + void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); + } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |