aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp491
1 files changed, 338 insertions, 153 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
+STATISTIC(NumP9Setb,
+ "Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@@ -327,7 +329,6 @@ private:
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
- MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
};
} // end anonymous namespace
@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
- const TerminatorInst *BBTerm = BB->getTerminator();
+ const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- KnownBits LKnown, RKnown;
- CurDAG->computeKnownBits(Op0, LKnown);
- CurDAG->computeKnownBits(Op1, RKnown);
+ KnownBits LKnown = CurDAG->computeKnownBits(Op0);
+ KnownBits RKnown = CurDAG->computeKnownBits(Op1);
unsigned TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();
@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
- KnownBits MKnown;
- CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+ KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
@@ -1083,9 +1082,14 @@ class BitPermutationSelector {
// lowest-order bit.
unsigned Idx;
+ // ConstZero means a bit we need to mask off.
+ // Variable is a bit comes from an input variable.
+ // VariableKnownToBeZero is also a bit comes from an input variable,
+ // but it is known to be already zero. So we do not need to mask them.
enum Kind {
ConstZero,
- Variable
+ Variable,
+ VariableKnownToBeZero
} K;
ValueBit(SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1098,11 @@ class BitPermutationSelector {
: V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
bool isZero() const {
- return K == ConstZero;
+ return K == ConstZero || K == VariableKnownToBeZero;
}
bool hasValue() const {
- return K == Variable;
+ return K == Variable || K == VariableKnownToBeZero;
}
SDValue getValue() const {
@@ -1248,8 +1252,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumBits; ++i)
if (((Mask >> i) & 1) == 1)
Bits[i] = (*LHSBits)[i];
- else
- Bits[i] = ValueBit(ValueBit::ConstZero);
+ else {
+ // AND instruction masks this bit. If the input is already zero,
+ // we have nothing to do here. Otherwise, make the bit ConstZero.
+ if ((*LHSBits)[i].isZero())
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
return std::make_pair(Interesting, &Bits);
}
@@ -1259,8 +1269,26 @@ class BitPermutationSelector {
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
bool AllDisjoint = true;
- for (unsigned i = 0; i < NumBits; ++i)
- if (LHSBits[i].isZero())
+ SDValue LastVal = SDValue();
+ unsigned LastIdx = 0;
+ for (unsigned i = 0; i < NumBits; ++i) {
+ if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+ // If both inputs are known to be zero and one is ConstZero and
+ // another is VariableKnownToBeZero, we can select whichever
+ // we like. To minimize the number of bit groups, we select
+ // VariableKnownToBeZero if this bit is the next bit of the same
+ // input variable from the previous bit. Otherwise, we select
+ // ConstZero.
+ if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
+ LHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = LHSBits[i];
+ else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
+ RHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = RHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
+ else if (LHSBits[i].isZero())
Bits[i] = RHSBits[i];
else if (RHSBits[i].isZero())
Bits[i] = LHSBits[i];
@@ -1268,6 +1296,16 @@ class BitPermutationSelector {
AllDisjoint = false;
break;
}
+ // We remember the value and bit index of this bit.
+ if (Bits[i].hasValue()) {
+ LastVal = Bits[i].getValue();
+ LastIdx = Bits[i].getValueBitIndex();
+ }
+ else {
+ if (LastVal) LastVal = SDValue();
+ LastIdx = 0;
+ }
+ }
if (!AllDisjoint)
break;
@@ -1293,6 +1331,72 @@ class BitPermutationSelector {
return std::make_pair(Interesting, &Bits);
}
+ case ISD::TRUNCATE: {
+ EVT FromType = V.getOperand(0).getValueType();
+ EVT ToType = V.getValueType();
+ // We support only the case with truncate from i64 to i32.
+ if (FromType != MVT::i64 || ToType != MVT::i32)
+ break;
+ const unsigned NumAllBits = FromType.getSizeInBits();
+ SmallVector<ValueBit, 64> *InBits;
+ std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+ NumAllBits);
+ const unsigned NumValidBits = ToType.getSizeInBits();
+
+ // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+ // So, we cannot include this truncate.
+ bool UseUpper32bit = false;
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+ UseUpper32bit = true;
+ break;
+ }
+ if (UseUpper32bit)
+ break;
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*InBits)[i];
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::AssertZext: {
+ // For AssertZext, we look through the operand and
+ // mark the bits known to be zero.
+ const SmallVector<ValueBit, 64> *LHSBits;
+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
+ NumBits);
+
+ EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+ const unsigned NumValidBits = FromType.getSizeInBits();
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero);
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::LOAD:
+ LoadSDNode *LD = cast<LoadSDNode>(V);
+ if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ const unsigned NumValidBits = VT.getSizeInBits();
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = ValueBit(V, i);
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
+
+ // Zero-extending load itself cannot be optimized. So, it is not
+ // interesting by itself though it gives useful information.
+ return std::make_pair(Interesting = false, &Bits);
+ }
+ break;
}
for (unsigned i = 0; i < NumBits; ++i)
@@ -1304,7 +1408,7 @@ class BitPermutationSelector {
// For each value (except the constant ones), compute the left-rotate amount
// to get it from its original to final position.
void computeRotationAmounts() {
- HasZeros = false;
+ NeedMask = false;
RLAmt.resize(Bits.size());
for (unsigned i = 0; i < Bits.size(); ++i)
if (Bits[i].hasValue()) {
@@ -1314,7 +1418,7 @@ class BitPermutationSelector {
else
RLAmt[i] = Bits.size() - (VBI - i);
} else if (Bits[i].isZero()) {
- HasZeros = true;
+ NeedMask = true;
RLAmt[i] = UINT32_MAX;
} else {
llvm_unreachable("Unknown value bit type");
@@ -1330,6 +1434,7 @@ class BitPermutationSelector {
unsigned LastRLAmt = RLAmt[0];
SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
unsigned LastGroupStartIdx = 0;
+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
for (unsigned i = 1; i < Bits.size(); ++i) {
unsigned ThisRLAmt = RLAmt[i];
SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
@@ -1342,10 +1447,20 @@ class BitPermutationSelector {
LastGroupStartIdx = 0;
}
+ // If this bit is known to be zero and the current group is a bit group
+ // of zeros, we do not need to terminate the current bit group even the
+ // Value or RLAmt does not match here. Instead, we terminate this group
+ // when the first non-zero bit appears later.
+ if (IsGroupOfZeros && Bits[i].isZero())
+ continue;
+
// If this bit has the same underlying value and the same rotate factor as
// the last one, then they're part of the same group.
if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
- continue;
+ // We cannot continue the current group if this bits is not known to
+ // be zero in a bit group of zeros.
+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
+ continue;
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1353,6 +1468,7 @@ class BitPermutationSelector {
LastRLAmt = ThisRLAmt;
LastValue = ThisValue;
LastGroupStartIdx = i;
+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
}
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1401,7 +1517,7 @@ class BitPermutationSelector {
for (auto &I : ValueRots) {
ValueRotsVec.push_back(I.second);
}
- llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ llvm::sort(ValueRotsVec);
}
// In 64-bit mode, rlwinm and friends have a rotation operator that
@@ -1588,6 +1704,17 @@ class BitPermutationSelector {
return ExtVal;
}
+ SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+ if (V.getValueSizeInBits() == 32)
+ return V;
+
+ assert(V.getValueSizeInBits() == 64);
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+ MVT::i32, V, SubRegIdx), 0);
+ return SubVal;
+ }
+
// Depending on the number of groups for a particular value, it might be
// better to rotate, mask explicitly (using andi/andis), and then or the
// result. Select this part of the result first.
@@ -1646,12 +1773,12 @@ class BitPermutationSelector {
SDValue VRot;
if (VRI.RLAmt) {
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
Ops), 0);
} else {
- VRot = VRI.V;
+ VRot = TruncateToInt32(VRI.V, dl);
}
SDValue ANDIVal, ANDISVal;
@@ -1698,17 +1825,17 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
ValueRotInfo &VRI = ValueRotsVec[0];
if (VRI.RLAmt) {
if (InstCnt) *InstCnt += 1;
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
} else {
- Res = VRI.V;
+ Res = TruncateToInt32(VRI.V, dl);
}
// Now, remove all groups with this underlying value and rotation factor.
@@ -1723,13 +1850,13 @@ class BitPermutationSelector {
for (auto &BG : BitGroups) {
if (!Res) {
SDValue Ops[] =
- { BG.V, getI32Imm(BG.RLAmt, dl),
+ { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
} else {
SDValue Ops[] =
- { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
@@ -2077,7 +2204,7 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
// groups will come first, and so the VRI representing the largest number
// of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2357,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool HasZeros;
+ bool NeedMask;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2259,10 +2386,10 @@ public:
" selection for: ");
LLVM_DEBUG(N->dump(CurDAG));
- // Fill it RLAmt and set HasZeros.
+ // Fill it RLAmt and set NeedMask.
computeRotationAmounts();
- if (!HasZeros)
+ if (!NeedMask)
return Select(N, false);
// We currently have two techniques for handling results with zeros: early
@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
- SDLoc dl(N);
- EVT VecVT = N.getValueType();
- if (VecVT == MVT::v4i32) {
- if (SignBitVec) {
- SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
- *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
- SDValue(ZV, 0));
- }
- return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
- }
- else if (VecVT == MVT::v8i16) {
- SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
- getI32Imm(0x8000, dl));
- SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
- SDValue(Hi, 0),
- getI32Imm(0x8000, dl));
- SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
- SDValue(ScaImm, 0));
- /*
- Alternatively, we can do this as follow to use VRF instead of GPR.
- vspltish 5, 1
- vspltish 6, 15
- vslh 5, 6, 5
- */
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
- SDValue(VecImm, 0));
- }
- else if (VecVT == MVT::v16i8) {
- SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
- getI32Imm(0x80, dl));
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
- SDValue(VecImm, 0));
+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
+ bool &NeedSwapOps, bool &IsUnCmp) {
+
+ assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue TrueRes = N->getOperand(2);
+ SDValue FalseRes = N->getOperand(3);
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
+ if (!TrueConst)
+ return false;
+
+ assert((N->getSimpleValueType(0) == MVT::i64 ||
+ N->getSimpleValueType(0) == MVT::i32) &&
+ "Expecting either i64 or i32 here.");
+
+ // We are looking for any of:
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
+ int64_t TrueResVal = TrueConst->getSExtValue();
+ if ((TrueResVal < -1 || TrueResVal > 1) ||
+ (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
+ (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
+ (TrueResVal == 0 &&
+ (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
+ return false;
+
+ bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
+ SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ if (SetOrSelCC.getOpcode() != ISD::SETCC &&
+ SetOrSelCC.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ // Without this setb optimization, the outer SELECT_CC will be manually
+ // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
+ // transforms pseduo instruction to isel instruction. When there are more than
+ // one use for result like zext/sext, with current optimization we only see
+ // isel is replaced by setb but can't see any significant gain. Since
+ // setb has longer latency than original isel, we should avoid this. Another
+ // point is that setb requires comparison always kept, it can break the
+ // oppotunity to get the comparison away if we have in future.
+ if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
+ return false;
+
+ SDValue InnerLHS = SetOrSelCC.getOperand(0);
+ SDValue InnerRHS = SetOrSelCC.getOperand(1);
+ ISD::CondCode InnerCC =
+ cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
+ // If the inner comparison is a select_cc, make sure the true/false values are
+ // 1/-1 and canonicalize it if needed.
+ if (InnerIsSel) {
+ ConstantSDNode *SelCCTrueConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
+ ConstantSDNode *SelCCFalseConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
+ if (!SelCCTrueConst || !SelCCFalseConst)
+ return false;
+ int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
+ int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
+ // The values must be -1/1 (requiring a swap) or 1/-1.
+ if (SelCCTVal == -1 && SelCCFVal == 1) {
+ std::swap(InnerLHS, InnerRHS);
+ } else if (SelCCTVal != 1 || SelCCFVal != -1)
+ return false;
}
- else
- llvm_unreachable("Unsupported vector data type for flipSignBit");
+
+ // Canonicalize unsigned case
+ if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
+ IsUnCmp = true;
+ InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
+ }
+
+ bool InnerSwapped = false;
+ if (LHS == InnerRHS && RHS == InnerLHS)
+ InnerSwapped = true;
+ else if (LHS != InnerLHS || RHS != InnerRHS)
+ return false;
+
+ switch (CC) {
+ // (select_cc lhs, rhs, 0, \
+ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
+ case ISD::SETEQ:
+ if (!InnerIsSel)
+ return false;
+ if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
+ return false;
+ NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
+ break;
+
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
+ case ISD::SETULT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETLT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
+ (InnerCC == ISD::SETLT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == 1);
+ else
+ return false;
+ break;
+
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
+ case ISD::SETUGT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETGT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
+ (InnerCC == ISD::SETGT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == -1);
+ else
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
+ LLVM_DEBUG(N->dump());
+
+ return true;
}
// Select - Convert the specified operand from a target-independent to a
@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- KnownBits LHSKnown;
- CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
+ KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getValueType() == MVT::i1)
break;
+ if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ bool NeedSwapOps = false;
+ bool IsUnCmp = false;
+ if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (NeedSwapOps)
+ std::swap(LHS, RHS);
+
+ // Make use of SelectCC to generate the comparison to set CR bits, for
+ // equality comparisons having one literal operand, SelectCC probably
+ // doesn't need to materialize the whole literal and just use xoris to
+ // check it first, it leads the following comparison result can't
+ // exactly represent GT/LT relationship. So to avoid this we specify
+ // SETGT/SETUGT here instead of SETEQ.
+ SDValue GenCC =
+ SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
+ CurDAG->SelectNodeTo(
+ N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
+ N->getValueType(0), GenCC);
+ NumP9Setb++;
+ return;
+ }
+ }
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
return;
}
- case ISD::VSELECT:
- if (PPCSubTarget->hasVSX()) {
- SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
- CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
- return;
- }
- break;
-
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
N->getValueType(0), Ops);
- cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
return;
}
}
@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
}
+ // A signed comparison of i1 values produces the opposite result to an
+ // unsigned one if the condition code includes less-than or greater-than.
+ // This is because 1 is the most negative signed i1 number and the most
+ // positive unsigned i1 number. The CR-logical operations used for such
+ // comparisons are non-commutative so for signed comparisons vs. unsigned
+ // ones, the input operands just need to be swapped.
+ if (ISD::isSignedIntSetCC(CC))
+ Swap = !Swap;
+
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
N->getOperand(Swap ? 3 : 2),
N->getOperand(Swap ? 2 : 3)), 0);
@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
-
- if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
- CModel == CodeModel::Large) {
+ if (PPCLowering->isAccessedAsGotIndirect(GA)) {
+ // If it is access as got-indirect, we need an extra LD to load
+ // the address.
SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
transferMemOperands(N, MN);
@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
- }
-
+ // Build the address relative to the TOC-pointer..
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
- case ISD::ABS: {
- assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
- // For vector absolute difference, we use VABSDUW instruction of POWER9.
- // Since VABSDU instructions are for unsigned integers, we need adjustment
- // for signed integers.
- // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
- // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
- // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
- EVT VecVT = N->getOperand(0).getValueType();
- SDNode *AbsOp = nullptr;
- unsigned AbsOpcode;
-
- if (VecVT == MVT::v4i32)
- AbsOpcode = PPC::VABSDUW;
- else if (VecVT == MVT::v8i16)
- AbsOpcode = PPC::VABSDUH;
- else if (VecVT == MVT::v16i8)
- AbsOpcode = PPC::VABSDUB;
- else
- llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
- // Even for signed integers, we can skip adjustment if all values are
- // known to be positive (as signed integer) due to zero-extended inputs.
- if (N->getOperand(0).getOpcode() == ISD::SUB &&
- N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(N->getOperand(0)->getOperand(0)),
- SDValue(N->getOperand(0)->getOperand(1)));
- ReplaceNode(N, AbsOp);
- return;
- }
- if (N->getOperand(0).getOpcode() == ISD::SUB) {
- SDValue SubVal = N->getOperand(0);
- SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
- SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(Op0, 0), SDValue(Op1, 0));
- }
- else {
- SDNode *Op1 = nullptr;
- SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
- SDValue(Op1, 0));
- }
- ReplaceNode(N, AbsOp);
- return;
- }
}
SelectCode(N);