src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-01-09 21:23:09 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-09 21:23:09 +0000
commit	909545a822eef491158f831688066f0ec2866938 (patch)
tree	5b0bf0e81294007a9b462b21031b3df272c655c3 /lib/Target/X86/X86ISelLowering.cpp
parent	7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff)
download	src-909545a822eef491158f831688066f0ec2866938.tar.gz src-909545a822eef491158f831688066f0ec2866938.zip

Vendor import of llvm trunk r291476:vendor/llvm/llvm-trunk-r291476

https://llvm.org/svn/llvm-project/llvm/trunk@291476

Notes

Notes: svn path=/vendor/llvm/dist/; revision=311818 svn path=/vendor/llvm/llvm-trunk-r291476/; revision=311819; tag=vendor/llvm/llvm-trunk-r291476

Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')

-rw-r--r--

lib/Target/X86/X86ISelLowering.cpp

264

1 files changed, 199 insertions, 65 deletions

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7f72ab17f619..db76ddf04c06 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -6962,23 +6962,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,

return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);

}

-/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB

-/// node.

-static SDValue LowerToAddSub(const BuildVectorSDNode *BV,

- const X86Subtarget &Subtarget, SelectionDAG &DAG) {

+/// Returns true iff \p BV builds a vector with the result equivalent to

+/// the result of ADDSUB operation.

+/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation

+/// are written to the parameters \p Opnd0 and \p Opnd1.

+static bool isAddSub(const BuildVectorSDNode *BV,

+ const X86Subtarget &Subtarget, SelectionDAG &DAG,

+ SDValue &Opnd0, SDValue &Opnd1) {

MVT VT = BV->getSimpleValueType(0);

if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&

- (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))

- return SDValue();

+ (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&

+ (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))

+ return false;

- SDLoc DL(BV);

unsigned NumElts = VT.getVectorNumElements();

SDValue InVec0 = DAG.getUNDEF(VT);

SDValue InVec1 = DAG.getUNDEF(VT);

- assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||

- VT == MVT::v2f64) && "build_vector with an invalid type found!");

// Odd-numbered elements in the input build vector are obtained from

// adding two integer/float elements.

// Even-numbered elements in the input build vector are obtained from

@@ -7000,7 +7001,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,

// Early exit if we found an unexpected opcode.

if (Opcode != ExpectedOpcode)

- return SDValue();

+ return false;

SDValue Op0 = Op.getOperand(0);

SDValue Op1 = Op.getOperand(1);

@@ -7013,11 +7014,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,

!isa<ConstantSDNode>(Op0.getOperand(1)) ||

!isa<ConstantSDNode>(Op1.getOperand(1)) ||

Op0.getOperand(1) != Op1.getOperand(1))

- return SDValue();

+ return false;

unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();

if (I0 != i)

- return SDValue();

+ return false;

// We found a valid add/sub node. Update the information accordingly.

if (i & 1)

@@ -7029,39 +7030,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,

if (InVec0.isUndef()) {

InVec0 = Op0.getOperand(0);

if (InVec0.getSimpleValueType() != VT)

- return SDValue();

+ return false;

}

if (InVec1.isUndef()) {

InVec1 = Op1.getOperand(0);

if (InVec1.getSimpleValueType() != VT)

- return SDValue();

+ return false;

}

// Make sure that operands in input to each add/sub node always

// come from a same pair of vectors.

if (InVec0 != Op0.getOperand(0)) {

if (ExpectedOpcode == ISD::FSUB)

- return SDValue();

+ return false;

// FADD is commutable. Try to commute the operands

// and then test again.

std::swap(Op0, Op1);

if (InVec0 != Op0.getOperand(0))

- return SDValue();

+ return false;

}

if (InVec1 != Op1.getOperand(0))

- return SDValue();

+ return false;

// Update the pair of expected opcodes.

std::swap(ExpectedOpcode, NextExpectedOpcode);

}

// Don't try to fold this build_vector into an ADDSUB if the inputs are undef.

- if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef())

- return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);

+ if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())

+ return false;

- return SDValue();

+ Opnd0 = InVec0;

+ Opnd1 = InVec1;

+ return true;

+/// Returns true if is possible to fold MUL and an idiom that has already been

+/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).

+/// If (and only if) true is returned, the operands of FMADDSUB are written to

+/// parameters \p Opnd0, \p Opnd1, \p Opnd2.

+///

+/// Prior to calling this function it should be known that there is some

+/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation

+/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called

+/// before replacement of such SDNode with ADDSUB operation. Thus the number

+/// of \p Opnd0 uses is expected to be equal to 2.

+/// For example, this function may be called for the following IR:

+/// %AB = fmul fast <2 x double> %A, %B

+/// %Sub = fsub fast <2 x double> %AB, %C

+/// %Add = fadd fast <2 x double> %AB, %C

+/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,

+/// <2 x i32> <i32 0, i32 3>

+/// There is a def for %Addsub here, which potentially can be replaced by

+/// X86ISD::ADDSUB operation:

+/// %Addsub = X86ISD::ADDSUB %AB, %C

+/// and such ADDSUB can further be replaced with FMADDSUB:

+/// %Addsub = FMADDSUB %A, %B, %C.

+///

+/// The main reason why this method is called before the replacement of the

+/// recognized ADDSUB idiom with ADDSUB operation is that such replacement

+/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit

+/// FMADDSUB is.

+static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,

+ SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {

+ if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||

+ !Subtarget.hasAnyFMA())

+ return false;

+ // FIXME: These checks must match the similar ones in

+ // DAGCombiner::visitFADDForFMACombine. It would be good to have one

+ // function that would answer if it is Ok to fuse MUL + ADD to FMADD

+ // or MUL + ADDSUB to FMADDSUB.

+ const TargetOptions &Options = DAG.getTarget().Options;

+ bool AllowFusion =

+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);

+ if (!AllowFusion)

+ return false;

+ Opnd2 = Opnd1;

+ Opnd1 = Opnd0.getOperand(1);

+ Opnd0 = Opnd0.getOperand(0);

+ return true;

+/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation

+/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.

+static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,

+ const X86Subtarget &Subtarget,

+ SelectionDAG &DAG) {

+ SDValue Opnd0, Opnd1;

+ if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))

+ return SDValue();

+ MVT VT = BV->getSimpleValueType(0);

+ SDLoc DL(BV);

+ // Try to generate X86ISD::FMADDSUB node here.

+ SDValue Opnd2;

+ if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))

+ return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);

+ // Do not generate X86ISD::ADDSUB node for 512-bit types even though

+ // the ADDSUB idiom has been successfully recognized. There are no known

+ // X86 targets with 512-bit ADDSUB instructions!

+ // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom

+ // recognition.

+ if (VT.is512BitVector())

+ return SDValue();

+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);

}

/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.

@@ -7290,7 +7370,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {

return VectorConstant;

BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());

- if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG))

+ if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))

return AddSub;

if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))

return HorizontalOp;

@@ -12965,6 +13045,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,

if (Subtarget.hasVBMI())

return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);

+ // Try to create an in-lane repeating shuffle mask and then shuffle the

+ // the results into the target lanes.

+ if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(

+ DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))

+ return V;

// FIXME: Implement direct support for this type!

return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);

}

@@ -16985,9 +17071,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {

return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);

}

- if (Cond.getOpcode() == ISD::SETCC)

- if (SDValue NewCond = LowerSETCC(Cond, DAG))

+ if (Cond.getOpcode() == ISD::SETCC) {

+ if (SDValue NewCond = LowerSETCC(Cond, DAG)) {

Cond = NewCond;

+ // If the condition was updated, it's possible that the operands of the

+ // select were also updated (for example, EmitTest has a RAUW). Refresh

+ // the local references to the select operands in case they got stale.

+ Op1 = Op.getOperand(1);

+ Op2 = Op.getOperand(2);

+ }

// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y

// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y

@@ -17193,22 +17286,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,

if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())

return SDValue();

- if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {

+ if (VT.is512BitVector() && InVTElt != MVT::i1) {

if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)

return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));

return DAG.getNode(X86ISD::VSEXT, dl, VT, In);

}

- assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");

+ assert (InVTElt == MVT::i1 && "Unexpected vector type");

MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);

- SDValue NegOne = DAG.getConstant(

- APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);

- SDValue Zero = DAG.getConstant(

- APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);

+ SDValue V;

+ if (Subtarget.hasDQI()) {

+ V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);

+ assert(!VT.is512BitVector() && "Unexpected vector type");

+ } else {

+ SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);

+ SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);

+ V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);

+ if (VT.is512BitVector())

+ return V;

+ }

- SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);

- if (VT.is512BitVector())

- return V;

return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);

}

@@ -21528,6 +21625,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,

return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});

}

+ // It's worth extending once and using the vXi16/vXi32 shifts for smaller

+ // types, but without AVX512 the extra overheads to get from vXi8 to vXi32

+ // make the existing SSE solution better.

+ if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||

+ (Subtarget.hasAVX512() && VT == MVT::v16i16) ||

+ (Subtarget.hasAVX512() && VT == MVT::v16i8) ||

+ (Subtarget.hasBWI() && VT == MVT::v32i8)) {

+ MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);

+ MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());

+ unsigned ExtOpc =

+ Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

+ R = DAG.getNode(ExtOpc, dl, ExtVT, R);

+ Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);

+ return DAG.getNode(ISD::TRUNCATE, dl, VT,

+ DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));

+ }

if (VT == MVT::v16i8 ||

(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {

MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);

@@ -21636,19 +21750,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,

}

- // It's worth extending once and using the v8i32 shifts for 16-bit types, but

- // the extra overheads to get from v16i8 to v8i32 make the existing SSE

- // solution better.

- if (Subtarget.hasInt256() && VT == MVT::v8i16) {

- MVT ExtVT = MVT::v8i32;

- unsigned ExtOpc =

- Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

- R = DAG.getNode(ExtOpc, dl, ExtVT, R);

- Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);

- return DAG.getNode(ISD::TRUNCATE, dl, VT,

- DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));

- }

if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {

MVT ExtVT = MVT::v8i32;

SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);

@@ -27763,29 +27864,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,

return SDValue();

}

-/// \brief Try to combine a shuffle into a target-specific add-sub node.

+/// Returns true iff the shuffle node \p N can be replaced with ADDSUB

+/// operation. If true is returned then the operands of ADDSUB operation

+/// are written to the parameters \p Opnd0 and \p Opnd1.

///

-/// We combine this directly on the abstract vector shuffle nodes so it is

-/// easier to generically match. We also insert dummy vector shuffle nodes for

-/// the operands which explicitly discard the lanes which are unused by this

-/// operation to try to flow through the rest of the combiner the fact that

-/// they're unused.

-static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,

- SelectionDAG &DAG) {

- SDLoc DL(N);

+/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes

+/// so it is easier to generically match. We also insert dummy vector shuffle

+/// nodes for the operands which explicitly discard the lanes which are unused

+/// by this operation to try to flow through the rest of the combiner

+/// the fact that they're unused.

+static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,

+ SDValue &Opnd0, SDValue &Opnd1) {

EVT VT = N->getValueType(0);

if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&

- (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))

- return SDValue();

+ (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&

+ (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))

+ return false;

// We only handle target-independent shuffles.

// FIXME: It would be easy and harmless to use the target shuffle mask

// extraction tool to support more.

if (N->getOpcode() != ISD::VECTOR_SHUFFLE)

- return SDValue();

+ return false;

ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();

- SmallVector<int, 8> Mask(OrigMask.begin(), OrigMask.end());

+ SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());

SDValue V1 = N->getOperand(0);

SDValue V2 = N->getOperand(1);

@@ -27796,27 +27900,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,

ShuffleVectorSDNode::commuteMask(Mask);

std::swap(V1, V2);

} else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)

- return SDValue();

+ return false;

// If there are other uses of these operations we can't fold them.

if (!V1->hasOneUse() || !V2->hasOneUse())

- return SDValue();

+ return false;

// Ensure that both operations have the same operands. Note that we can

// commute the FADD operands.

SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);

if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&

(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))

- return SDValue();

+ return false;

// We're looking for blends between FADD and FSUB nodes. We insist on these

// nodes being lined up in a specific expected pattern.

if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||

isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||

- isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))

+ isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||

+ isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,

+ 8, 25, 10, 27, 12, 29, 14, 31})))

+ return false;

+ Opnd0 = LHS;

+ Opnd1 = RHS;

+ return true;

+/// \brief Try to combine a shuffle into a target-specific add-sub or

+/// mul-add-sub node.

+static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,

+ const X86Subtarget &Subtarget,

+ SelectionDAG &DAG) {

+ SDValue Opnd0, Opnd1;

+ if (!isAddSub(N, Subtarget, Opnd0, Opnd1))

+ return SDValue();

+ EVT VT = N->getValueType(0);

+ SDLoc DL(N);

+ // Try to generate X86ISD::FMADDSUB node here.

+ SDValue Opnd2;

+ if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))

+ return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);

+ // Do not generate X86ISD::ADDSUB node for 512-bit types even though

+ // the ADDSUB idiom has been successfully recognized. There are no known

+ // X86 targets with 512-bit ADDSUB instructions!

+ if (VT.is512BitVector())

return SDValue();

- return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);

+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);

}

// We are looking for a shuffle where both sources are concatenated with undef

@@ -27878,7 +28012,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,

// If we have legalized the vector types, look for blends of FADD and FSUB

// nodes that we can fuse into an ADDSUB node.

if (TLI.isTypeLegal(VT))

- if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))

+ if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))

return AddSub;

// During Type Legalization, when promoting illegal vector types,