src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-01-20 11:41:25 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-01-20 11:41:25 +0000
commit	d9484dd61cc151c4f34c31e07f693fefa66316b5 (patch)
tree	ab0560b3da293f1fafd3269c59692e929418f5c2 /contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
parent	79e0962d4c3cf1f0acf359a9d69cb3ac68c414c4 (diff)
parent	d8e91e46262bc44006913e6796843909f1ac7bcd (diff)
download	src-d9484dd61cc151c4f34c31e07f693fefa66316b5.tar.gz src-d9484dd61cc151c4f34c31e07f693fefa66316b5.zip

Merge llvm trunk r351319, resolve conflicts, and update FREEBSD-Xlist.

Notes

Notes: svn path=/projects/clang800-import/; revision=343210

Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp')

-rw-r--r--

contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

491

1 files changed, 338 insertions, 153 deletions

diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,

"Number of logical ops on i1 values calculated in GPR.");

STATISTIC(OmittedForNonExtendUses,

"Number of compares not eliminated as they have non-extending uses.");

+STATISTIC(NumP9Setb,

+ "Number of compares lowered to setb.");

// FIXME: Remove this once the bug has been fixed!

cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",

@@ -327,7 +329,6 @@ private:

bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;

void transferMemOperands(SDNode *N, SDNode *Result);

- MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);

};

} // end anonymous namespace

@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,

if (!FuncInfo->BPI) return PPC::BR_NO_HINT;

const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();

- const TerminatorInst *BBTerm = BB->getTerminator();

+ const Instruction *BBTerm = BB->getTerminator();

if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;

@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {

SDValue Op1 = N->getOperand(1);

SDLoc dl(N);

- KnownBits LKnown, RKnown;

- CurDAG->computeKnownBits(Op0, LKnown);

- CurDAG->computeKnownBits(Op1, RKnown);

+ KnownBits LKnown = CurDAG->computeKnownBits(Op0);

+ KnownBits RKnown = CurDAG->computeKnownBits(Op1);

unsigned TargetMask = LKnown.Zero.getZExtValue();

unsigned InsertMask = RKnown.Zero.getZExtValue();

@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {

// The AND mask might not be a constant, and we need to make sure that

// if we're going to fold the masking with the insert, all bits not

// know to be zero in the mask are known to be one.

- KnownBits MKnown;

- CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);

+ KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));

bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();

unsigned SHOpc = Op1.getOperand(0).getOpcode();

@@ -1083,9 +1082,14 @@ class BitPermutationSelector {

// lowest-order bit.

unsigned Idx;

+ // ConstZero means a bit we need to mask off.

+ // Variable is a bit comes from an input variable.

+ // VariableKnownToBeZero is also a bit comes from an input variable,

+ // but it is known to be already zero. So we do not need to mask them.

enum Kind {

ConstZero,

- Variable

+ Variable,

+ VariableKnownToBeZero

} K;

ValueBit(SDValue V, unsigned I, Kind K = Variable)

@@ -1094,11 +1098,11 @@ class BitPermutationSelector {

: V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}

bool isZero() const {

- return K == ConstZero;

+ return K == ConstZero || K == VariableKnownToBeZero;

}

bool hasValue() const {

- return K == Variable;

+ return K == Variable || K == VariableKnownToBeZero;

}

SDValue getValue() const {

@@ -1248,8 +1252,14 @@ class BitPermutationSelector {

for (unsigned i = 0; i < NumBits; ++i)

if (((Mask >> i) & 1) == 1)

Bits[i] = (*LHSBits)[i];

- else

- Bits[i] = ValueBit(ValueBit::ConstZero);

+ else {

+ // AND instruction masks this bit. If the input is already zero,

+ // we have nothing to do here. Otherwise, make the bit ConstZero.

+ if ((*LHSBits)[i].isZero())

+ Bits[i] = (*LHSBits)[i];

+ else

+ Bits[i] = ValueBit(ValueBit::ConstZero);

+ }

return std::make_pair(Interesting, &Bits);

}

@@ -1259,8 +1269,26 @@ class BitPermutationSelector {

const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;

bool AllDisjoint = true;

- for (unsigned i = 0; i < NumBits; ++i)

- if (LHSBits[i].isZero())

+ SDValue LastVal = SDValue();

+ unsigned LastIdx = 0;

+ for (unsigned i = 0; i < NumBits; ++i) {

+ if (LHSBits[i].isZero() && RHSBits[i].isZero()) {

+ // If both inputs are known to be zero and one is ConstZero and

+ // another is VariableKnownToBeZero, we can select whichever

+ // we like. To minimize the number of bit groups, we select

+ // VariableKnownToBeZero if this bit is the next bit of the same

+ // input variable from the previous bit. Otherwise, we select

+ // ConstZero.

+ if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&

+ LHSBits[i].getValueBitIndex() == LastIdx + 1)

+ Bits[i] = LHSBits[i];

+ else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&

+ RHSBits[i].getValueBitIndex() == LastIdx + 1)

+ Bits[i] = RHSBits[i];

+ else

+ Bits[i] = ValueBit(ValueBit::ConstZero);

+ }

+ else if (LHSBits[i].isZero())

Bits[i] = RHSBits[i];

else if (RHSBits[i].isZero())

Bits[i] = LHSBits[i];

@@ -1268,6 +1296,16 @@ class BitPermutationSelector {

AllDisjoint = false;

break;

}

+ // We remember the value and bit index of this bit.

+ if (Bits[i].hasValue()) {

+ LastVal = Bits[i].getValue();

+ LastIdx = Bits[i].getValueBitIndex();

+ }

+ else {

+ if (LastVal) LastVal = SDValue();

+ LastIdx = 0;

+ }

if (!AllDisjoint)

break;

@@ -1293,6 +1331,72 @@ class BitPermutationSelector {

return std::make_pair(Interesting, &Bits);

}

+ case ISD::TRUNCATE: {

+ EVT FromType = V.getOperand(0).getValueType();

+ EVT ToType = V.getValueType();

+ // We support only the case with truncate from i64 to i32.

+ if (FromType != MVT::i64 || ToType != MVT::i32)

+ break;

+ const unsigned NumAllBits = FromType.getSizeInBits();

+ SmallVector<ValueBit, 64> *InBits;

+ std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),

+ NumAllBits);

+ const unsigned NumValidBits = ToType.getSizeInBits();

+ // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.

+ // So, we cannot include this truncate.

+ bool UseUpper32bit = false;

+ for (unsigned i = 0; i < NumValidBits; ++i)

+ if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {

+ UseUpper32bit = true;

+ break;

+ }

+ if (UseUpper32bit)

+ break;

+ for (unsigned i = 0; i < NumValidBits; ++i)

+ Bits[i] = (*InBits)[i];

+ return std::make_pair(Interesting, &Bits);

+ }

+ case ISD::AssertZext: {

+ // For AssertZext, we look through the operand and

+ // mark the bits known to be zero.

+ const SmallVector<ValueBit, 64> *LHSBits;

+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),

+ NumBits);

+ EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();

+ const unsigned NumValidBits = FromType.getSizeInBits();

+ for (unsigned i = 0; i < NumValidBits; ++i)

+ Bits[i] = (*LHSBits)[i];

+ // These bits are known to be zero.

+ for (unsigned i = NumValidBits; i < NumBits; ++i)

+ Bits[i] = ValueBit((*LHSBits)[i].getValue(),

+ (*LHSBits)[i].getValueBitIndex(),

+ ValueBit::VariableKnownToBeZero);

+ return std::make_pair(Interesting, &Bits);

+ }

+ case ISD::LOAD:

+ LoadSDNode *LD = cast<LoadSDNode>(V);

+ if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {

+ EVT VT = LD->getMemoryVT();

+ const unsigned NumValidBits = VT.getSizeInBits();

+ for (unsigned i = 0; i < NumValidBits; ++i)

+ Bits[i] = ValueBit(V, i);

+ // These bits are known to be zero.

+ for (unsigned i = NumValidBits; i < NumBits; ++i)

+ Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);

+ // Zero-extending load itself cannot be optimized. So, it is not

+ // interesting by itself though it gives useful information.

+ return std::make_pair(Interesting = false, &Bits);

+ }

+ break;

}

for (unsigned i = 0; i < NumBits; ++i)

@@ -1304,7 +1408,7 @@ class BitPermutationSelector {

// For each value (except the constant ones), compute the left-rotate amount

// to get it from its original to final position.

void computeRotationAmounts() {

- HasZeros = false;

+ NeedMask = false;

RLAmt.resize(Bits.size());

for (unsigned i = 0; i < Bits.size(); ++i)

if (Bits[i].hasValue()) {

@@ -1314,7 +1418,7 @@ class BitPermutationSelector {

else

RLAmt[i] = Bits.size() - (VBI - i);

} else if (Bits[i].isZero()) {

- HasZeros = true;

+ NeedMask = true;

RLAmt[i] = UINT32_MAX;

} else {

llvm_unreachable("Unknown value bit type");

@@ -1330,6 +1434,7 @@ class BitPermutationSelector {

unsigned LastRLAmt = RLAmt[0];

SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();

unsigned LastGroupStartIdx = 0;

+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

for (unsigned i = 1; i < Bits.size(); ++i) {

unsigned ThisRLAmt = RLAmt[i];

SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();

@@ -1342,10 +1447,20 @@ class BitPermutationSelector {

LastGroupStartIdx = 0;

}

+ // If this bit is known to be zero and the current group is a bit group

+ // of zeros, we do not need to terminate the current bit group even the

+ // Value or RLAmt does not match here. Instead, we terminate this group

+ // when the first non-zero bit appears later.

+ if (IsGroupOfZeros && Bits[i].isZero())

+ continue;

// If this bit has the same underlying value and the same rotate factor as

// the last one, then they're part of the same group.

if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)

- continue;

+ // We cannot continue the current group if this bits is not known to

+ // be zero in a bit group of zeros.

+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))

+ continue;

if (LastValue.getNode())

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

@@ -1353,6 +1468,7 @@ class BitPermutationSelector {

LastRLAmt = ThisRLAmt;

LastValue = ThisValue;

LastGroupStartIdx = i;

+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();

}

if (LastValue.getNode())

BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,

@@ -1401,7 +1517,7 @@ class BitPermutationSelector {

for (auto &I : ValueRots) {

ValueRotsVec.push_back(I.second);

}

- llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());

+ llvm::sort(ValueRotsVec);

}

// In 64-bit mode, rlwinm and friends have a rotation operator that

@@ -1588,6 +1704,17 @@ class BitPermutationSelector {

return ExtVal;

}

+ SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {

+ if (V.getValueSizeInBits() == 32)

+ return V;

+ assert(V.getValueSizeInBits() == 64);

+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);

+ SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,

+ MVT::i32, V, SubRegIdx), 0);

+ return SubVal;

+ }

// Depending on the number of groups for a particular value, it might be

// better to rotate, mask explicitly (using andi/andis), and then or the

// result. Select this part of the result first.

@@ -1646,12 +1773,12 @@ class BitPermutationSelector {

SDValue VRot;

if (VRI.RLAmt) {

SDValue Ops[] =

- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),

- getI32Imm(31, dl) };

+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

+ getI32Imm(0, dl), getI32Imm(31, dl) };

VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,

Ops), 0);

} else {

- VRot = VRI.V;

+ VRot = TruncateToInt32(VRI.V, dl);

}

SDValue ANDIVal, ANDISVal;

@@ -1698,17 +1825,17 @@ class BitPermutationSelector {

// If we've not yet selected a 'starting' instruction, and we have no zeros

// to fill in, select the (Value, RLAmt) with the highest priority (largest

// number of groups), and start with this rotated value.

- if ((!HasZeros || LateMask) && !Res) {

+ if ((!NeedMask || LateMask) && !Res) {

ValueRotInfo &VRI = ValueRotsVec[0];

if (VRI.RLAmt) {

if (InstCnt) *InstCnt += 1;

SDValue Ops[] =

- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),

- getI32Imm(31, dl) };

+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),

+ getI32Imm(0, dl), getI32Imm(31, dl) };

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),

0);

} else {

- Res = VRI.V;

+ Res = TruncateToInt32(VRI.V, dl);

}

// Now, remove all groups with this underlying value and rotation factor.

@@ -1723,13 +1850,13 @@ class BitPermutationSelector {

for (auto &BG : BitGroups) {

if (!Res) {

SDValue Ops[] =

- { BG.V, getI32Imm(BG.RLAmt, dl),

+ { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);

} else {

SDValue Ops[] =

- { Res, BG.V, getI32Imm(BG.RLAmt, dl),

+ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),

getI32Imm(Bits.size() - BG.EndIdx - 1, dl),

getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };

Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);

@@ -2077,7 +2204,7 @@ class BitPermutationSelector {

// If we've not yet selected a 'starting' instruction, and we have no zeros

// to fill in, select the (Value, RLAmt) with the highest priority (largest

// number of groups), and start with this rotated value.

- if ((!HasZeros || LateMask) && !Res) {

+ if ((!NeedMask || LateMask) && !Res) {

// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32

// groups will come first, and so the VRI representing the largest number

// of groups might not be first (it might be the first Repl32 groups).

@@ -2230,7 +2357,7 @@ class BitPermutationSelector {

SmallVector<ValueBit, 64> Bits;

- bool HasZeros;

+ bool NeedMask;

SmallVector<unsigned, 64> RLAmt;

SmallVector<BitGroup, 16> BitGroups;

@@ -2259,10 +2386,10 @@ public:

" selection for: ");

LLVM_DEBUG(N->dump(CurDAG));

- // Fill it RLAmt and set HasZeros.

+ // Fill it RLAmt and set NeedMask.

computeRotationAmounts();

- if (!HasZeros)

+ if (!NeedMask)

return Select(N, false);

// We currently have two techniques for handling results with zeros: early

@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {

void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {

// Transfer memoperands.

- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();

- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);

+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();

+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});

}

-/// This method returns a node after flipping the MSB of each element

-/// of vector integer type. Additionally, if SignBitVec is non-null,

-/// this method sets a node with one at MSB of all elements

-/// and zero at other bits in SignBitVec.

-MachineSDNode *

-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {

- SDLoc dl(N);

- EVT VecVT = N.getValueType();

- if (VecVT == MVT::v4i32) {

- if (SignBitVec) {

- SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);

- *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,

- SDValue(ZV, 0));

- }

- return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);

- }

- else if (VecVT == MVT::v8i16) {

- SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,

- getI32Imm(0x8000, dl));

- SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,

- SDValue(Hi, 0),

- getI32Imm(0x8000, dl));

- SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,

- SDValue(ScaImm, 0));

- /*

- Alternatively, we can do this as follow to use VRF instead of GPR.

- vspltish 5, 1

- vspltish 6, 15

- vslh 5, 6, 5

- */

- if (SignBitVec) *SignBitVec = VecImm;

- return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,

- SDValue(VecImm, 0));

- }

- else if (VecVT == MVT::v16i8) {

- SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,

- getI32Imm(0x80, dl));

- if (SignBitVec) *SignBitVec = VecImm;

- return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,

- SDValue(VecImm, 0));

+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,

+ bool &NeedSwapOps, bool &IsUnCmp) {

+ assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ SDValue TrueRes = N->getOperand(2);

+ SDValue FalseRes = N->getOperand(3);

+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);

+ if (!TrueConst)

+ return false;

+ assert((N->getSimpleValueType(0) == MVT::i64 ||

+ N->getSimpleValueType(0) == MVT::i32) &&

+ "Expecting either i64 or i32 here.");

+ // We are looking for any of:

+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)

+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)

+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)

+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)

+ int64_t TrueResVal = TrueConst->getSExtValue();

+ if ((TrueResVal < -1 || TrueResVal > 1) ||

+ (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||

+ (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||

+ (TrueResVal == 0 &&

+ (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))

+ return false;

+ bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;

+ SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);

+ if (SetOrSelCC.getOpcode() != ISD::SETCC &&

+ SetOrSelCC.getOpcode() != ISD::SELECT_CC)

+ return false;

+ // Without this setb optimization, the outer SELECT_CC will be manually

+ // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass

+ // transforms pseduo instruction to isel instruction. When there are more than

+ // one use for result like zext/sext, with current optimization we only see

+ // isel is replaced by setb but can't see any significant gain. Since

+ // setb has longer latency than original isel, we should avoid this. Another

+ // point is that setb requires comparison always kept, it can break the

+ // oppotunity to get the comparison away if we have in future.

+ if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))

+ return false;

+ SDValue InnerLHS = SetOrSelCC.getOperand(0);

+ SDValue InnerRHS = SetOrSelCC.getOperand(1);

+ ISD::CondCode InnerCC =

+ cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();

+ // If the inner comparison is a select_cc, make sure the true/false values are

+ // 1/-1 and canonicalize it if needed.

+ if (InnerIsSel) {

+ ConstantSDNode *SelCCTrueConst =

+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));

+ ConstantSDNode *SelCCFalseConst =

+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));

+ if (!SelCCTrueConst || !SelCCFalseConst)

+ return false;

+ int64_t SelCCTVal = SelCCTrueConst->getSExtValue();

+ int64_t SelCCFVal = SelCCFalseConst->getSExtValue();

+ // The values must be -1/1 (requiring a swap) or 1/-1.

+ if (SelCCTVal == -1 && SelCCFVal == 1) {

+ std::swap(InnerLHS, InnerRHS);

+ } else if (SelCCTVal != 1 || SelCCFVal != -1)

+ return false;

}

- else

- llvm_unreachable("Unsupported vector data type for flipSignBit");

+ // Canonicalize unsigned case

+ if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {

+ IsUnCmp = true;

+ InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;

+ }

+ bool InnerSwapped = false;

+ if (LHS == InnerRHS && RHS == InnerLHS)

+ InnerSwapped = true;

+ else if (LHS != InnerLHS || RHS != InnerRHS)

+ return false;

+ switch (CC) {

+ // (select_cc lhs, rhs, 0, \

+ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)

+ case ISD::SETEQ:

+ if (!InnerIsSel)

+ return false;

+ if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)

+ return false;

+ NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;

+ break;

+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)

+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)

+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)

+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)

+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)

+ case ISD::SETULT:

+ if (!IsUnCmp && InnerCC != ISD::SETNE)

+ return false;

+ IsUnCmp = true;

+ LLVM_FALLTHROUGH;

+ case ISD::SETLT:

+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||

+ (InnerCC == ISD::SETLT && InnerSwapped))

+ NeedSwapOps = (TrueResVal == 1);

+ else

+ return false;

+ break;

+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)

+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)

+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)

+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)

+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)

+ case ISD::SETUGT:

+ if (!IsUnCmp && InnerCC != ISD::SETNE)

+ return false;

+ IsUnCmp = true;

+ LLVM_FALLTHROUGH;

+ case ISD::SETGT:

+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||

+ (InnerCC == ISD::SETGT && InnerSwapped))

+ NeedSwapOps = (TrueResVal == -1);

+ else

+ return false;

+ break;

+ default:

+ return false;

+ }

+ LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");

+ LLVM_DEBUG(N->dump());

+ return true;

}

// Select - Convert the specified operand from a target-independent to a

@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

int16_t Imm;

if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

isIntS16Immediate(N->getOperand(1), Imm)) {

- KnownBits LHSKnown;

- CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);

+ KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));

// If this is equivalent to an add, then we can fold it with the

// FrameIndex calculation.

@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

N->getOperand(0).getValueType() == MVT::i1)

break;

+ if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {

+ bool NeedSwapOps = false;

+ bool IsUnCmp = false;

+ if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ if (NeedSwapOps)

+ std::swap(LHS, RHS);

+ // Make use of SelectCC to generate the comparison to set CR bits, for

+ // equality comparisons having one literal operand, SelectCC probably

+ // doesn't need to materialize the whole literal and just use xoris to

+ // check it first, it leads the following comparison result can't

+ // exactly represent GT/LT relationship. So to avoid this we specify

+ // SETGT/SETUGT here instead of SETEQ.

+ SDValue GenCC =

+ SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);

+ CurDAG->SelectNodeTo(

+ N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,

+ N->getValueType(0), GenCC);

+ NumP9Setb++;

+ return;

+ }

// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc

if (!isPPC64)

if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))

@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);

return;

}

- case ISD::VSELECT:

- if (PPCSubTarget->hasVSX()) {

- SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };

- CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);

- return;

- }

- break;

case ISD::VECTOR_SHUFFLE:

if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||

N->getValueType(0) == MVT::v2i64)) {

@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {

SDValue Chain = LD->getChain();

SDValue Ops[] = { Base, Offset, Chain };

- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

- MemOp[0] = LD->getMemOperand();

+ MachineMemOperand *MemOp = LD->getMemOperand();

SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,

N->getValueType(0), Ops);

- cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);

+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});

return;

}

@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;

}

+ // A signed comparison of i1 values produces the opposite result to an

+ // unsigned one if the condition code includes less-than or greater-than.

+ // This is because 1 is the most negative signed i1 number and the most

+ // positive unsigned i1 number. The CR-logical operations used for such

+ // comparisons are non-commutative so for signed comparisons vs. unsigned

+ // ones, the input operands just need to be swapped.

+ if (ISD::isSignedIntSetCC(CC))

+ Swap = !Swap;

SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,

N->getOperand(Swap ? 3 : 2),

N->getOperand(Swap ? 2 : 3)), 0);

@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

SDValue TOCbase = N->getOperand(1);

SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,

TOCbase, GA);

- if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||

- CModel == CodeModel::Large) {

+ if (PPCLowering->isAccessedAsGotIndirect(GA)) {

+ // If it is access as got-indirect, we need an extra LD to load

+ // the address.

SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,

SDValue(Tmp, 0));

transferMemOperands(N, MN);

@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

return;

}

- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {

- const GlobalValue *GV = G->getGlobal();

- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);

- if (GVFlags & PPCII::MO_NLP_FLAG) {

- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,

- SDValue(Tmp, 0));

- transferMemOperands(N, MN);

- ReplaceNode(N, MN);

- return;

- }

+ // Build the address relative to the TOC-pointer..

ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,

SDValue(Tmp, 0), GA));

return;

@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {

return;

}

- case ISD::ABS: {

- assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");

- // For vector absolute difference, we use VABSDUW instruction of POWER9.

- // Since VABSDU instructions are for unsigned integers, we need adjustment

- // for signed integers.

- // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).

- // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.

- // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).

- EVT VecVT = N->getOperand(0).getValueType();

- SDNode *AbsOp = nullptr;

- unsigned AbsOpcode;

- if (VecVT == MVT::v4i32)

- AbsOpcode = PPC::VABSDUW;

- else if (VecVT == MVT::v8i16)

- AbsOpcode = PPC::VABSDUH;

- else if (VecVT == MVT::v16i8)

- AbsOpcode = PPC::VABSDUB;

- else

- llvm_unreachable("Unsupported vector data type for ISD::ABS");

- // Even for signed integers, we can skip adjustment if all values are

- // known to be positive (as signed integer) due to zero-extended inputs.

- if (N->getOperand(0).getOpcode() == ISD::SUB &&

- N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&

- N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {

- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,

- SDValue(N->getOperand(0)->getOperand(0)),

- SDValue(N->getOperand(0)->getOperand(1)));

- ReplaceNode(N, AbsOp);

- return;

- }

- if (N->getOperand(0).getOpcode() == ISD::SUB) {

- SDValue SubVal = N->getOperand(0);

- SDNode *Op0 = flipSignBit(SubVal->getOperand(0));

- SDNode *Op1 = flipSignBit(SubVal->getOperand(1));

- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,

- SDValue(Op0, 0), SDValue(Op1, 0));

- }

- else {

- SDNode *Op1 = nullptr;

- SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);

- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),

- SDValue(Op1, 0));

- }

- ReplaceNode(N, AbsOp);

- return;

- }

}

SelectCode(N);