diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
commit | 3ca95b020283db6244cab92ede73c969253b6a31 (patch) | |
tree | d16e791e58694facd8f68d3e2797a1eaa8018afc /contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | |
parent | 27067774dce3388702a4cf744d7096c6fb71b688 (diff) | |
parent | c3aee98e721333f265a88d6bf348e6e468f027d4 (diff) |
Update llvm to release_39 branch r276489, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang390-import/; revision=304240
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 162 |
1 files changed, 146 insertions, 16 deletions
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 619f7c8d25df..18f71675437b 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86ShuffleDecode.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// @@ -44,6 +45,17 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; } +void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len, + SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + assert((Idx + Len) <= NumElts && "Insertion out of range"); + + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back(i); + for (unsigned i = 0; i != Len; ++i) + ShuffleMask[Idx + i] = NumElts + i; +} + // <3,1> or <6,7,2,3> void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = NElts / 2; i != NElts; ++i) @@ -263,6 +275,25 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { } } +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = DstVT.getVectorNumElements(); + ShuffleMask.append(NumElts, 0); +} + +/// Decodes a broadcast of a subvector to a larger vector type. +void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, + SmallVectorImpl<int> &ShuffleMask) { + assert(SrcVT.getScalarType() == DstVT.getScalarType() && + "Non matching vector element types"); + unsigned NumElts = SrcVT.getVectorNumElements(); + unsigned Scale = DstVT.getSizeInBits() / SrcVT.getSizeInBits(); + + for (unsigned i = 0; i != Scale; ++i) + for (unsigned j = 0; j != NumElts; ++j) + ShuffleMask.push_back(j); +} + /// \brief Decode a shuffle packed values at 128-bit granularity /// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) /// immediate mask into a shuffle mask. @@ -303,9 +334,9 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, ShuffleMask.push_back(M); continue; } - // For AVX vectors with 32 bytes the base of the shuffle is the half of - // the vector we're inside. - int Base = i < 16 ? 0 : 16; + // For 256/512-bit vectors the base of the shuffle is the 128-bit + // subvector we're inside. + int Base = (i / 16) * 16; // If the high bit (7) of the byte is set, the element is zeroed. if (M & (1 << 7)) ShuffleMask.push_back(SM_SentinelZero); @@ -331,23 +362,62 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { } } -/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. -/// No VT provided since it only works on 256-bit, 4 element vectors. -void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm >> (2 * i)) & 3); +void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); + + // VPPERM Operation + // Bits[4:0] - Byte Index (0 - 31) + // Bits[7:5] - Permute Operation + // + // Permute Operation: + // 0 - Source byte (no logical operation). + // 1 - Invert source byte. + // 2 - Bit reverse of source byte. + // 3 - Bit reverse of inverted source byte. + // 4 - 00h (zero - fill). + // 5 - FFh (ones - fill). + // 6 - Most significant bit of source byte replicated in all bit positions. + // 7 - Invert most significant bit of source byte and replicate in all bit positions. + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + if (M == (uint64_t)SM_SentinelUndef) { + ShuffleMask.push_back(M); + continue; + } + + uint64_t PermuteOp = (M >> 5) & 0x7; + if (PermuteOp == 4) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + if (PermuteOp != 0) { + ShuffleMask.clear(); + return; + } + + uint64_t Index = M & 0x1F; + ShuffleMask.push_back((int)Index); } } -void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) { +/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. +void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + assert((VT.is256BitVector() || VT.is512BitVector()) && + (VT.getScalarSizeInBits() == 64) && "Unexpected vector value type"); + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned l = 0; l != NumElts; l += 4) + for (unsigned i = 0; i != 4; ++i) + ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); +} + +void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) { unsigned NumDstElts = DstVT.getVectorNumElements(); - unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); + unsigned SrcScalarBits = SrcScalarVT.getSizeInBits(); unsigned DstScalarBits = DstVT.getScalarSizeInBits(); unsigned Scale = DstScalarBits / SrcScalarBits; assert(SrcScalarBits < DstScalarBits && "Expected zero extension mask to increase scalar size"); - assert(SrcVT.getVectorNumElements() >= NumDstElts && - "Too many zero extension lanes"); for (unsigned i = 0; i != NumDstElts; i++) { Mask.push_back(i); @@ -445,18 +515,78 @@ void DecodeINSERTQIMask(int Len, int Idx, ShuffleMask.push_back(SM_SentinelUndef); } +void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = VT.getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; + assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && + "Unexpected vector size"); + assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); + unsigned LaneOffset = i & ~(NumEltsPerLane - 1); + ShuffleMask.push_back((int)(LaneOffset + M)); + } +} + +void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = VT.getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; + assert((VecSize == 128 || VecSize == 256) && + "Unexpected vector size"); + assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + // VPERMIL2 Operation. + // Bits[3] - Match Bit. + // Bits[2:1] - (Per Lane) PD Shuffle Mask. + // Bits[2:0] - (Per Lane) PS Shuffle Mask. + uint64_t Selector = RawMask[i]; + unsigned MatchBit = (Selector >> 3) & 0x1; + + // M2Z[0:1] MatchBit + // 0Xb X Source selected by Selector index. + // 10b 0 Source selected by Selector index. + // 10b 1 Zero. + // 11b 0 Zero. + // 11b 1 Source selected by Selector index. + if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + + unsigned Index = i & ~(NumEltsPerLane - 1); + if (EltSize == 64) + Index += (Selector >> 1) & 0x1; + else + Index += Selector & 0x3; + + unsigned SrcOffset = (Selector >> 2) & 1; + ShuffleMask.push_back((int)(SrcOffset + Index)); + } +} + void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { - for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; + uint64_t EltMaskSize = RawMask.size() - 1; + for (auto M : RawMask) { + M &= EltMaskSize; ShuffleMask.push_back((int)M); } } void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { - for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; + uint64_t EltMaskSize = (RawMask.size() * 2) - 1; + for (auto M : RawMask) { + M &= EltMaskSize; ShuffleMask.push_back((int)M); } } |