aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
commit7d523365ff1a3cc95bc058b33102500f61e8166d (patch)
treeb466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/CodeGen/SelectionDAG
parente3b65fde506060bec5cd110fcf03b440bd0eea1d (diff)
parentdd58ef019b700900793a1eb48b52123db01b654e (diff)
Update llvm to trunk r256633.
Notes
Notes: svn path=/projects/clang380-import/; revision=292941
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2644
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1283
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp272
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp304
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h110
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp275
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp530
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1048
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h109
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp133
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp137
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp267
24 files changed, 5051 insertions, 2921 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b29306bb54a..0872d7a9a228 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -156,13 +156,16 @@ namespace {
void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
@@ -233,18 +236,17 @@ namespace {
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
- SDValue visitSREM(SDNode *N);
- SDValue visitUREM(SDNode *N);
+ SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
- SDValue visitSDIVREM(SDNode *N);
- SDValue visitUDIVREM(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
@@ -265,6 +267,7 @@ namespace {
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCE(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
@@ -298,6 +301,10 @@ namespace {
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
@@ -312,9 +319,11 @@ namespace {
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -338,14 +347,17 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildReciprocalEstimate(SDValue Op);
- SDValue BuildRsqrtEstimate(SDValue Op);
- SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
- SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -374,6 +386,10 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Do FindBetterChain for a store and any possibly adjacent stores on
+ /// consecutive chains.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -388,19 +404,37 @@ namespace {
unsigned SequenceNum;
};
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
/// constant build_vector of the stored constant values in Stores.
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended. LoadedVT returns the type
+ /// of the original loaded value. NarrowLoad returns whether the load would
+ /// need to be narrowed in order to match.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumElem,
+ EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
@@ -409,7 +443,7 @@ namespace {
void getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
-
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -427,9 +461,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- auto *F = DAG.getMachineFunction().getFunction();
- ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
}
/// Runs the dag combiner on all nodes in the work list
@@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
@@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
- Op.getOperand(0));
+ Op.getOperand(0), Flags);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
@@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0));
+ Op.getOperand(1), Op.getOperand(0), Flags);
case ISD::FMUL:
case ISD::FDIV:
@@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
- Op.getOperand(1));
+ Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, Depth+1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
@@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I)
- AddToWorklist(I);
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
- case ISD::SREM: return visitSREM(N);
- case ISD::UREM: return visitUREM(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
- case ISD::SDIVREM: return visitSDIVREM(N);
- case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
@@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCE: return visitSETCCE(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
@@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
}
return SDValue();
}
@@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = {N1, N0};
- SDNode *CSENode;
- if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
- &BinNode->Flags);
- } else {
- CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
- }
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static bool isNullConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool isNullFPConstant(SDValue V) {
- ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
- return Const != nullptr && Const->isZero() && !Const->isNegative();
-}
-
-static bool isAllOnesConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
-}
-
-static bool isOneConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isOne();
-}
-
/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
/// ContantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
@@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
- if (VT.isInteger() && !VT.isVector()) {
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
-
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
-
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
- if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
- }
- }
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1) {
- SDLoc DL(N);
+ if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::CARRY_FALSE, DL,
- MVT::Glue));
- }
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (isNullConstant(N1))
- return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (isAllOnesConstant(N0))
- return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
@@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ if (isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
// reassociate mul
if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
@@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return SDValue();
}
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ EVT VT = Node->getValueType(0);
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->isOne())
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue()) {
- SDLoc DL(N);
+ if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
+
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
- N0, N1);
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
@@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
!cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
- // If dividing by powers of two is cheap, then don't perform the following
- // fold.
- if (TLI.isPow2SDivCheap())
- return SDValue();
-
// Target-specific implementation of sdiv x, pow2.
- SDValue Res = BuildSDIVPow2(N);
- if (Res.getNode())
+ if (SDValue Res = BuildSDIVPow2(N))
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
- SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
@@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
}
// If integer divide is expensive and we satisfy the requirements, emit an
- // alternate sequence.
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildSDIV(N);
- if (Op.getNode()) return Op;
- }
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, DL, VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDLoc DL(N);
SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
@@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
}
}
+
// fold (udiv x, c) -> alternate
- if (N1C && !TLI.isIntDivCheap()) {
- SDValue Op = BuildUDIV(N);
- if (Op.getNode()) return Op;
- }
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSREM(SDNode *N) {
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
- // fold (srem c1, c2) -> c1%c2
+ // fold (rem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
- N0C, N1C))
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
- // If we know the sign bits of both operands are zero, strength reduce to a
- // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
- }
- // If X/C can be simplified by the division-by-constant logic, lower
- // X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- return Sub;
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
}
- }
-
- // undef % X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
- // X % undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
- return N1;
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitUREM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- // fold (urem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
- N0C, N1C))
- return Folded;
- // fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
- }
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1,
+ } else {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
}
}
}
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
+ // To avoid mangling nodes, this simplification requires that the combine()
+ // call for the speculative DIV must not cause a DIVREM conversion. We guard
+ // against this by skipping the simplification if isIntDivCheap(). When
+ // div is not cheap, combine will not return a DIVREM. Regardless,
+ // checking cheapness here makes sense since the simplification results in
+ // fatter code.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
+ assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+ (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
@@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
}
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
- if (Res.getNode()) return Res;
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
- if (Res.getNode()) return Res;
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
- return SDValue();
-}
+ // fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
-SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
- SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
- if (Res.getNode()) return Res;
+ // canonicalize constant to RHS
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ !isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
}
@@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
return SDValue();
}
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad) {
+ uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+ if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ return false;
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ NarrowLoad = false;
+ return true;
+ }
+
+ // Do not change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ NarrowLoad = true;
+ return true;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
: cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT LoadedVT = LN0->getMemoryVT();
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
-
+ auto NarrowLoad = false;
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad)) {
+ if (!NarrowLoad) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
@@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
-
- // Do not change the width of a volatile load.
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
+ } else {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
@@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return Combined;
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
@@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
if (Result != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
}
}
@@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return Combined;
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
- SDValue BSwap = MatchBSwapHWord(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
- BSwap = MatchBSwapHWordLow(N, N0, N1);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
return BSwap;
// reassociate or
@@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
- if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
@@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
}
// Return true if we can prove that, whenever Neg and Pos are both in the
-// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
-// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
- // If OpSize is a power of 2 then:
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+ // If EltSize is a power of 2 then:
//
- // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
- // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
//
- // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
// for the stronger condition:
//
- // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
//
- // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
// we can just replace Neg with Neg' for the rest of the function.
//
// In other cases we check for the even stronger condition:
//
- // Neg == OpSize - Pos [B]
+ // Neg == EltSize - Pos [B]
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
- // behavior if Pos == 0 (and consequently Neg == OpSize).
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
//
- // We could actually use [A] whenever OpSize is a power of 2, but the
+ // We could actually use [A] whenever EltSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
- // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
// as well as (sub 32, Pos), but:
//
// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
//
// always invokes undefined behavior for 32-bit X.
//
- // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND &&
- isPowerOf2_64(OpSize) &&
- Neg.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
- Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(OpSize);
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ if (NegC->getAPIntValue() == EltSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(EltSize);
+ }
+ }
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
if (Neg.getOpcode() != ISD::SUB)
- return 0;
- ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
if (!NegC)
- return 0;
+ return false;
SDValue NegOp1 = Neg.getOperand(1);
- // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits &&
- Pos.getOpcode() == ISD::AND &&
- Pos.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
- Pos = Pos.getOperand(0);
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ if (PosC->getAPIntValue() == EltSize - 1)
+ Pos = Pos.getOperand(0);
// The condition we need is now:
//
- // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
//
// If NegOp1 == Pos then we need:
//
- // OpSize & Mask == NegC & Mask
+ // EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
APInt Width;
if (Pos == NegOp1)
Width = NegC->getAPIntValue();
+
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
// Then the condition we want to prove becomes:
//
- // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
//
// which, again because "x & Mask" is a truncation, becomes:
//
- // NegC & Mask == (OpSize - PosC) & Mask
- // OpSize & Mask == (NegC + PosC) & Mask
- else if (Pos.getOpcode() == ISD::ADD &&
- Pos.getOperand(0) == NegOp1 &&
- Pos.getOperand(1).getOpcode() == ISD::Constant)
- Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
- NegC->getAPIntValue());
- else
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
return false;
- // Now we just need to check that OpSize & Mask == Width & Mask.
+ // Now we just need to check that EltSize & Mask == Width & Mask.
if (MaskLoBits)
- // Opsize & Mask is 0 since Mask is Opsize - 1.
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
return Width.getLoBits(MaskLoBits) == 0;
- return Width == OpSize;
+ return Width == EltSize;
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
@@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
@@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
- std::swap(LHSMask , RHSMask );
+ std::swap(LHSMask, RHSMask);
}
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
@@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (LHSShiftAmt.getOpcode() == ISD::Constant &&
- RHSShiftAmt.getOpcode() == ISD::Constant) {
- uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != OpSizeInBits)
+ if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+ uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != EltSizeInBits)
return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
@@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+ APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+ SDValue Mask = DAG.getConstant(AllBits, DL, VT);
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
- Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask,
+ DAG.getConstant(RHSBits, DL, VT)));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
- Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask,
+ DAG.getConstant(LHSBits, DL, VT)));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
return Rot.getNode();
@@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
// Simplify the expression using non-local knowledge.
if (!VT.isVector() &&
@@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSHL = visitShiftByConstant(N, N1C);
- if (NewSHL.getNode())
- return NewSHL;
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ if (SDValue Folded =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
+ }
}
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ return NewSHL;
+
return SDValue();
}
@@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRA = visitShiftByConstant(N, N1C);
- if (NewSRA.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N, N1C))
return NewSRA;
- }
return SDValue();
}
@@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
@@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- if (N1C && !N1C->isOpaque()) {
- SDValue NewSRL = visitShiftByConstant(N, N1C);
- if (NewSRL.getNode())
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N, N1C))
return NewSRL;
- }
// Attempt to convert a srl of a load into a narrower zero-extending load.
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// Here is a common situation. We want to optimize:
@@ -4973,70 +5087,47 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // fold selects based on a setcc into other things, such as min/max/abs
- if (N0.getOpcode() == ISD::SETCC) {
- // select x, y (fcmp lt x, y) -> fminnum x, y
- // select x, y (fcmp gt x, y) -> fmaxnum x, y
- //
- // This is OK if we don't care about what happens if either operand is a
- // NaN.
- //
-
- // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
- // no signed zeros as well as no nans.
- const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath &&
- VT.isFloatingPoint() && N0.hasOneUse() &&
- DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-
- SDValue FMinMax =
- combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
- N1, N2, CC, TLI, DAG);
- if (FMinMax)
- return FMinMax;
- }
-
- if ((!LegalOperations &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- N1, N2, N0.getOperand(2));
- return SimplifySelect(SDLoc(N), N0, N1, N2);
- }
-
if (VT0 == MVT::i1) {
- if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
- // select (and Cond0, Cond1), X, Y
- // -> select Cond0, (select Cond1, X, Y), Y
- if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its prefered form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence
+ = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
InnerSelect, N2);
- }
- // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
- if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
- SDValue Cond0 = N0->getOperand(0);
- SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
InnerSelect);
- }
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
- if (N1->getOpcode() == ISD::SELECT) {
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
N0, N1_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
@@ -5049,13 +5140,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
- if (N2->getOpcode() == ISD::SELECT) {
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
- if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
N0, N2_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
@@ -5069,6 +5160,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, CC,
+ TLI, DAG))
+ return FMinMax;
+ }
+
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(SDLoc(N), N0, N1, N2);
+ }
+
return SDValue();
}
@@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
N2.getOpcode() == ISD::CONCAT_VECTORS &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SDValue CV = ConvertSelectToConcatVector(N, DAG);
- if (CV.getNode())
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
return CV;
}
@@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDLoc(N));
}
-/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (Carry.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
@@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (!VT.isVector()) {
EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
@@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// fold (zext (truncate x)) -> (and x, mask)
- if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
-
+ if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
- SDValue Op = N0.getOperand(0);
- if (Op.getValueType().bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (Op.getValueType().bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT)) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = N0.getOperand(0);
+ if (SrcVT.bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ } else if (SrcVT.bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
- return DAG.getZeroExtendInReg(Op, SDLoc(N),
- N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
- SetCCs, TLI);
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ auto NarrowLoad = false;
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad))
+ DoXform = false;
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ }
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
@@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
- if (NarrowLoad.getNode()) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
SDNode* oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
@@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
// Watch out for shift count overflow though.
if (Amt >= Mask.getBitWidth()) break;
APInt NewMask = Mask << Amt;
- SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
- if (SimplifyLHS.getNode())
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
@@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (sext_in_reg c1) -> c1
- if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
@@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (load x)) -> (smaller sextload x)
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
return NarrowLoad;
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
BSwap, N1);
}
- // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
- // into a build_vector.
- if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SmallVector<SDValue, 8> Elts;
- unsigned NumElts = N0->getNumOperands();
- unsigned ShAmt = VTBits - EVTBits;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->getOpcode() == ISD::UNDEF) {
- Elts.push_back(Op);
- continue;
- }
-
- ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
- Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SDLoc(Op), Op.getValueType()));
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
- }
-
return SDValue();
}
@@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
- SDValue Reduced = ReduceLoadWidth(N);
- if (Reduced.getNode())
+ if (SDValue Reduced = ReduceLoadWidth(N))
return Reduced;
+
// Handle the case where the load remains an extending load even
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
return SDValue();
}
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
@@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
@@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
@@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(X.getNode());
}
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
+ N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
+ N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
@@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
- if (N0.getOpcode() == ISD::BUILD_PAIR) {
- SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
- if (CombineLD.getNode())
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
return CombineLD;
- }
// Remove double bitcasts from shuffles - this is often a legacy of
// XformToShuffleWithZero being used to combine bitmaskings (of
@@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
// If operands are a bitcast, peek through if it casts the original VT.
- // If operands are a UNDEF or constant, just bitcast back to original VT.
+ // If operands are a constant, just bitcast back to original VT.
auto PeekThroughBitcast = [&](SDValue Op) {
if (Op.getOpcode() == ISD::BITCAST &&
- Op.getOperand(0)->getValueType(0) == VT)
+ Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && N0.getOpcode() == ISD::FMUL &&
+ N1.getOpcode() == ISD::FMUL) {
+ if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ std::swap(N0, N1);
+ }
+
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
@@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
@@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath);
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations &&
- TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
- bool HasFMA = ((!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- UnsafeFPMath);
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
// Always prefer FMAD to FMA for precision.
- unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
@@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
@@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+ if ((AllowFusion || HasFMAD) && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (UnsafeFPMath && LookThroughFPExt) {
+ if (AllowFusion && LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
@@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
}
+/// Try to perform FMA combining on a given FMUL node.
+SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
+ // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
+ // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
+ // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
+ if (XC0 && XC0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y);
+ if (XC0 && XC0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0))
+ return FMA;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations));
+ GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+ Flags),
+ Flags);
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, DL, VT));
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT));
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT));
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
}
}
@@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
}
}
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- SDValue Fused = visitFADDForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFADDForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// FSUB -> FMA combines:
- SDValue Fused = visitFSUBForFMACombine(N);
- if (Fused) {
+ if (SDValue Fused = visitFSUBForFMACombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector()) {
@@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
// canonicalize constant to RHS
if (isConstantFPBuildVectorOrConstantFP(N0) &&
!isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// the second operand of the outer multiply are constants.
if ((N1CFP && isConstOrConstSplatFP(N01)) ||
(BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
}
}
}
@@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
// during an early run of DAGCombiner can prevent folding with fmuls
// inserted during lowering.
- if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+ if (N0.getOpcode() == ISD::FADD &&
+ (N0.getOperand(0) == N0.getOperand(1)) &&
+ N0.hasOneUse()) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
return SDValue();
}
@@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
+ // TODO: The FMA node should have flags that propagate to these nodes.
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (N0CFP && !N1CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FMUL &&
- N0 == N2.getOperand(0) &&
- N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
- }
+ // TODO: FMA nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+ if (Options.UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
+ }
- // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FMUL && N1CFP &&
- N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
- N2);
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
+ N2);
+ }
}
// (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
- // (fma x, c, x) -> (fmul x, (c+1))
- if (Options.UnsafeFPMath && N1CFP && N0 == N2)
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT)));
-
- // (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+ if (Options.UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT)));
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ }
return SDValue();
}
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags *Flags = N->getFlags();
+ if (!UnsafeMath && !Flags->hasAllowReciprocal())
+ return SDValue();
+
+ // Skip if current node is a reciprocal.
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ SDValue N1 = N->getOperand(1);
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+ if (!MinUses || N1->use_size() < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if (Users.size() < MinUses)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
// fold vector ops
if (VT.isVector())
@@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
+ DAG.getConstantFP(Recip, DL, VT), Flags);
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
}
@@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations));
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
}
}
- // Combine multiple FDIVs with the same divisor into multiple FMULs by the
- // reciprocal.
- // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
- // Notice that this is not always beneficial. One reason is different target
- // may have different costs for FDIV and FMUL, so sometimes the cost of two
- // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
- // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
- if (Options.UnsafeFPMath) {
- // Skip if current node is a reciprocal.
- if (N0CFP && N0CFP->isExactlyValue(1.0))
- return SDValue();
-
- // Find all FDIV users of the same divisor.
- // Use a set because duplicates may be present in the user list.
- SetVector<SDNode *> Users;
- for (auto *U : N1->uses())
- if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
- Users.insert(U);
-
- if (TLI.combineRepeatedFPDivisors(Users.size())) {
- SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // FIXME: This optimization requires some level of fast-math, so the
- // created reciprocal node should at least have the 'allowReciprocal'
- // fast-math-flag set.
- SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
-
- // Dividend / Divisor -> Dividend * Reciprocal
- for (auto *U : Users) {
- SDValue Dividend = U->getOperand(0);
- if (Dividend != FPOne) {
- SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
- Reciprocal);
- CombineTo(U, NewNode);
- } else if (U != Reciprocal.getNode()) {
- // In the absence of fast-math-flags, this user node is always the
- // same node as Reciprocal, but with FMF they may be different nodes.
- CombineTo(U, Reciprocal);
- }
- }
- return SDValue(N, 0); // N was replaced.
- }
- }
+ if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+ return CombineRepeatedDivisors;
return SDValue();
}
@@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+ &cast<BinaryWithFlagsSDNode>(N)->Flags);
return SDValue();
}
@@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
return SDValue();
+ // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+ SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
if (!RV)
return SDValue();
-
+
EVT VT = RV.getValueType();
SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
AddToWorklist(RV.getNode());
// Unfortunately, RV is now NaN if the input was exactly 0.
// Select out this case and force the answer to 0.
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ EVT CCVT = getSetCCResultType(VT);
SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
AddToWorklist(ZeroCmp.getNode());
AddToWorklist(RV.getNode());
@@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
ZeroCmp, Zero, RV);
}
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ // Do not optimize out type conversion of f128 type yet.
+ // For some target like x86_64, configuration is changed
+ // to keep one f128 value in one SSE register, but
+ // instruction selection cannot handle FCOPYSIGN on
+ // SSE registers yet.
+ SDValue N1 = N->getOperand(1);
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND) &&
+ (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
- if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(0));
@@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
- TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ (TLI.isFPImmLegal(CVal, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N0.getOperand(1)),
+ &cast<BinaryWithFlagsSDNode>(N0)->Flags);
}
}
@@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
@@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Op1 = TheXor->getOperand(1);
if (Op0.getOpcode() == Op1.getOpcode()) {
// Avoid missing important xor optimizations.
- SDValue Tmp = visitXOR(TheXor);
- if (Tmp.getNode()) {
+ if (SDValue Tmp = visitXOR(TheXor)) {
if (Tmp.getNode() != TheXor) {
DEBUG(dbgs() << "\nReplacing.8 ";
TheXor->dump(&DAG);
@@ -9722,8 +10089,8 @@ struct LoadedSlice {
void addSliceGain(const LoadedSlice &LS) {
// Each slice saves a truncate.
const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
- if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
- LS.Inst->getOperand(0).getValueType()))
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
++Truncates;
// If there is a shift amount, this slice gets rid of it.
if (LS.Shift)
@@ -10625,30 +10992,109 @@ struct BaseIndexOffset {
};
} // namespace
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
EVT Ty) const {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
- BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+ Chains.push_back(St->getChain());
+ BuildVector.push_back(St->getValue());
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ unsigned NumStores, bool IsConstantSrc, bool UseVector) {
// Make sure we have something to merge.
- if (NumElem < 2)
+ if (NumStores < 2)
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned LatestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
+ for (unsigned i=0; i < NumStores; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
@@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
LatestNodeUsed = i;
}
+ SmallVector<SDValue, 8> Chains;
+
// The latest Node in the DAG.
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
if (UseVector) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ bool IsVec = MemVT.isVector();
+ unsigned Elts = NumStores;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ // Get the type for the merged vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
+ StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
} else {
SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = St->getValue();
- // All of the operands of a BUILD_VECTOR must have the same type.
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
+ Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
- }
+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+ DL, Ty, Ops); }
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = DAG.getDataLayout().isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ Chains.push_back(St->getChain());
+
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
@@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+ assert(!Chains.empty());
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
@@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// Replace the last store with the new store
CombineTo(LatestOp, NewStore);
// Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (StoreNodes[i].MemNode == LatestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return true;
}
-static bool allowableAlignment(const SelectionDAG &DAG,
- const TargetLowering &TLI, EVT EVTTy,
- unsigned AS, unsigned Align) {
- if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
- return true;
-
- Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
- return (Align >= ABIAlignment);
-}
-
void DAGCombiner::getStoreMergeAndAliasCandidates(
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
@@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
EVT MemVT = St->getMemoryVT();
unsigned Seq = 0;
StoreSDNode *Index = St;
+
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+
+ if (UseAA) {
+ // Look at other users of the same chain. Stores on the same chain do not
+ // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
+ // to be on the same chain, so don't bother looking at adjacent chains.
+
+ SDValue Chain = St->getChain();
+ for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ if (I.getOperandNo() != 0)
+ continue;
+
+ if (OtherST->isVolatile() || OtherST->isIndexed())
+ continue;
+
+ if (OtherST->getMemoryVT() != MemVT)
+ continue;
+
+ BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
+
+ if (Ptr.equalBaseIndex(BasePtr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
+ }
+ }
+
+ return;
+ }
+
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (Index != St && !SDValue(Index, 0)->hasOneUse())
@@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (Index->getMemoryVT() != MemVT)
break;
+ // We do not allow under-aligned stores in order to prevent
+ // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+ // be irrelevant here; what MATTERS is that we not move memory
+ // operations that potentially overlap past each-other.
+ if (Index->getAlignment() < MemVT.getStoreSize())
+ break;
+
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
@@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
return false;
- // Don't merge vectors into wider inputs.
- if (MemVT.isVector() || !MemVT.isSimple())
+ if (!MemVT.isSimple())
return false;
// Perform an early exit check. Do not bother looking at stored values that
@@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ // Don't merge vectors into wider vectors if the source data comes from loads.
+ // TODO: This restriction can be lifted by using logic similar to the
+ // ExtractVecSrc case.
+ if (MemVT.isVector() && IsLoadSrc)
return false;
// Only look at ends of store sequences.
@@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// We need to make sure that these nodes do not interfere with
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
+
// Save the StoreSDNodes that we find in the chain.
SmallVector<MemOpLink, 8> StoreNodes;
getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
-
+
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // Sort the memory operands according to their distance from the base pointer.
+ // Sort the memory operands according to their distance from the
+ // base pointer. As a secondary criteria: make sure stores coming
+ // later in the code come first in the list. This is important for
+ // the non-UseAA case, because we're merging stores into the FINAL
+ // store along a chain which potentially contains aliasing stores.
+ // Thus, if there are multiple stores to the same address, the last
+ // one can be considered for merging but not the others.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
return LHS.OffsetFromBase < RHS.OffsetFromBase ||
(LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum > RHS.SequenceNum);
+ LHS.SequenceNum < RHS.SequenceNum);
});
// Scan the memory operations on the chain and find the first non-consecutive
@@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
}
- bool Alias = false;
// Check if this store interferes with any of the loads that we found.
- for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
- if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
- Alias = true;
- break;
- }
- // We found a load that alias with this store. Stop the sequence.
- if (Alias)
+ // If we find a load that alias with this store. Stop the sequence.
+ if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+ [&](LSBaseSDNode* Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
@@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
@@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find a legal type for the constant store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast) {
LastLegalType = i+1;
// Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
LastLegalType = i + 1;
}
}
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
+ FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ LastLegalVectorType = i + 1;
}
}
-
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if (NoVectors) {
- LastLegalVectorType = 0;
- } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
- LastLegalVectorType,
- FirstStoreAS)) {
- LastLegalVectorType = 0;
- }
-
// Check if we found a legal integer type to store.
if (LastLegalType == 0 && LastLegalVectorType == 0)
return false;
@@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecEltSrc) {
- unsigned NumElem = 0;
+ if (IsExtractVecSrc) {
+ unsigned NumStoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
+ unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a vector.
// It should be possible to handle mixed sources, but load sources need
// more careful handling (see the block of code below that handles
// consecutive loads).
- if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
return false;
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
if (TLI.isTypeLegal(Ty) &&
- allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
- NumElem = i + 1;
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ NumStoresToMerge = i + 1;
}
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
false, true);
}
@@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads much share the same chain.
+ // All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
@@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
LastConsecutiveLoad = i;
-
// Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
+ bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd) {
LastLegalVectorType = i + 1;
}
// Find a legal type for the integer store.
unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd)
LastLegalIntegerType = i + 1;
// Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ else if (TLI.getTypeAction(Context, StoreTy) ==
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
- FirstStoreAlign) &&
- allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
- FirstLoadAlign))
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ IsFastLd)
LastLegalIntegerType = i+1;
}
}
@@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (NumElem < 2)
return false;
+ // Collect the chains from all merged stores.
+ SmallVector<SDValue, 8> MergeStoreChains;
+ MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
// The latest Node in the DAG.
unsigned LatestNodeUsed = 0;
for (unsigned i=1; i<NumElem; ++i) {
@@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// latest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
LatestNodeUsed = i;
+
+ MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
}
LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
@@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
}
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
SDValue NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+ SDValue NewStoreChain =
+ DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
SDValue NewStore = DAG.getStore(
- LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
- // Replace one of the loads with the new load.
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
-
- // Remove the rest of the load chains.
- for (unsigned i = 1; i < NumElem ; ++i) {
- // Replace all chain users of the old load nodes with the chain of the new
- // load node.
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
}
// Replace the last store with the new store.
@@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return true;
}
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo,
+ Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal,
+ ST->getAlignment(), AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
+ Alignment, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
return Chain;
- // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
- // NOTE: If the original store is volatile, this transform must not increase
- // the number of stores. For example, on x86-32 an f64 can be stored in one
- // processor operation but an i64 (which is not legal) requires two. So the
- // transform should not be done in this case.
- if (Value.getOpcode() != ISD::TargetConstantFP) {
- SDValue Tmp;
- switch (CFP->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unknown FP type");
- case MVT::f16: // We don't do this for these yet.
- case MVT::f80:
- case MVT::f128:
- case MVT::ppcf128:
- break;
- case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- ;
- Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue(), SDLoc(CFP),
- MVT::i32);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
- break;
- case MVT::f64:
- if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
- ;
- Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
- getZExtValue(), SDLoc(CFP), MVT::i64);
- return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getMemOperand());
- }
-
- if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- // Many FP stores are not made apparent until after legalize, e.g. for
- // argument passing. Since this is so common, custom legalize the
- // 64-bit integer store into two 32-bit stores.
- uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
- SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Lo, Hi);
-
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
-
- SDLoc DL(N);
-
- SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
- Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal,
- ST->getAlignment(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
- Alignment = MinAlign(Alignment, 4U);
- SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
- Ptr, ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal,
- Alignment, AAInfo);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- St0, St1);
- }
-
- break;
- }
- }
- }
-
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try transforming a pair floating point load / store ops to integer
// load / store ops.
- SDValue NewST = TransformFPLoadStorePair(N);
- if (NewST.getNode())
+ if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
@@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
UseAA = false;
#endif
if (UseAA && ST->isUnindexed()) {
- // Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
-
- // If there is a better chain.
- if (Chain != BetterChain) {
- SDValue ReplStore;
-
- // Replace the chain to avoid dependency.
- if (ST->isTruncatingStore()) {
- ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemoryVT(), ST->getMemOperand());
- } else {
- ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getMemOperand());
- }
+ // FIXME: We should do this even without AA enabled. AA will just allow
+ // FindBetterChain to work in more situations. The problem with this is that
+ // any combine that expects memory operations to be on consecutive chains
+ // first needs to be updated to look for users of the same chain.
- // Create token to keep both nodes around.
- SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
- MVT::Other, Chain, ReplStore);
-
- // Make sure the new and old chains are cleaned up.
- AddToWorklist(Token.getNode());
-
- // Don't add users to work list.
- return CombineTo(N, Token, false);
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
}
}
@@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return SDValue(N, 0);
}
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(Value)) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
return ReduceLoadOpStoreWidth(N);
}
@@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue EltNo = N->getOperand(1);
- bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (ConstEltNo &&
+ InVec.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VT) &&
+ (InVec.hasOneUse() ||
+ TLI.aggressivelyPreferBuildVectorSources(VT))) {
+ SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (NVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
@@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(Elt);
+ int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
@@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
}
-SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
- // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
- // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
- // inputs come from at most two distinct vectors, turn this into a shuffle
- // node.
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ // Peek through any bitcast.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+
+ // Peek through any bitcast.
+ while (ExtVec.getOpcode() == ISD::BITCAST)
+ ExtVec = ExtVec.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.getOpcode() == ISD::UNDEF) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
return N->getOperand(0);
@@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
SVN->getMask().end(), [](int i) { return i == -1; })) {
N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+ makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ Indices.push_back(-1);
+ continue;
+ }
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (isAllOnesConstant(Elt))
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian()) {
+ Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ } else {
+ Bits = Bits.lshr(SubIdx * NumSubBits);
+ }
+
+ if (Split > 1)
+ Bits = Bits.trunc(NumSubBits);
+
+ if (Bits.isAllOnesValue())
Indices.push_back(i);
- else if (isNullConstant(Elt))
- Indices.push_back(NumElts+i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
- EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, dl, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
- }
+ SDValue Zero = DAG.getConstant(0, dl, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, &Indices[0]));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
return SDValue();
}
@@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
- // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
- // this operation.
- if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
- RHS.getOpcode() == ISD::BUILD_VECTOR) {
- // Check if both vectors are constants. If not bail out.
- if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
- cast<BuildVectorSDNode>(RHS)->isConstant()))
- return SDValue();
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- SDValue LHSOp = LHS.getOperand(i);
- SDValue RHSOp = RHS.getOperand(i);
-
- // Can't fold divide by zero.
- if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
- N->getOpcode() == ISD::FDIV) {
- if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
- break;
- }
-
- EVT VT = LHSOp.getValueType();
- EVT RVT = RHSOp.getValueType();
- if (RVT != VT) {
- // Integer BUILD_VECTOR operands may have types larger than the element
- // size (e.g., when the element type is not legal). Prior to type
- // legalization, the types may not match between the two BUILD_VECTORS.
- // Truncate one of the operands to make them match.
- if (RVT.getSizeInBits() > VT.getSizeInBits()) {
- RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
- } else {
- LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
- VT = RVT;
- }
- }
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
- LHSOp, RHSOp);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::Constant &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() == LHS.getNumOperands())
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
- }
-
// Type legalization might introduce new shuffles in the DAG.
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
@@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0));
+ LHS.getOperand(0), RHS.getOperand(0),
+ N->getFlags());
AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
@@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorklist(CPIdx.getNode());
- return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false,
- false, false, Alignment);
+ return DAG.getLoad(
+ TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
}
}
@@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Get a SetCC of the condition
// NOTE: Don't create a SETCC if it's not legal on this target.
if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC,
- LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
SDValue Temp, SCC;
// cast from setcc result type to select result type
if (LegalTypes) {
@@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
}
- // Check to see if this is the equivalent of setcc
- // FIXME: Turn all of these into setcc if setcc if setcc is legal
- // otherwise, go ahead with the folds.
- if (0 && isNullConstant(N3) && isOneConstant(N2)) {
- EVT XType = N0.getValueType();
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
- SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
- if (Res.getValueType() != VT)
- Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
- return Res;
- }
-
- // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
- if (isNullConstant(N1) && CC == ISD::SETEQ &&
- (!LegalOperations ||
- TLI.isOperationLegal(ISD::CTLZ, XType))) {
- SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
- return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
- DAG.getConstant(Log2_32(XType.getSizeInBits()),
- SDLoc(Ctlz),
- getShiftAmountTy(Ctlz.getValueType())));
- }
- // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
- if (isNullConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
- XType, DAG.getConstant(0, DL, XType), N0);
- SDValue NotN0 = DAG.getNOT(DL, N0, XType);
- return DAG.getNode(ISD::SRL, DL, XType,
- DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(XType)));
- }
- // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
- if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
- SDLoc DL(N0);
- SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
- DAG.getConstant(XType.getSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
- XType));
- }
- }
-
// Check to see if this is an integer abs.
// select_cc setg[te] X, 0, X, -X ->
// select_cc setgt X, -1, X, -X ->
@@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
}
@@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
AddToWorklist(HalfArg.getNode());
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
@@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations) {
+ unsigned Iterations,
+ SDNodeFlags *Flags) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
AddToWorklist(HalfEst.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
AddToWorklist(Est.getNode());
}
return Est;
}
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR ?
- BuildRsqrtNROneConst(Op, Est, Iterations) :
- BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
}
return Est;
}
@@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SDValue Chain = Chains.pop_back_val();
// For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
- // find more and revert to original chain since the xform is unlikely to be
- // profitable.
+ // chain until we reach the depth limit.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
- if (Depth > 6 || Aliases.size() == 2) {
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
Aliases.clear();
Aliases.push_back(OriginalChain);
return;
@@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ return false;
+
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ ChainedStores.push_back(St);
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ break;
+
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (true) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ ChainedStores.push_back(STn);
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = nullptr;
+ break;
+ }
+ }
+ }
+
+ bool MadeChange = false;
+ SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+
+ for (StoreSDNode *ChainedStore : ChainedStores) {
+ SDValue Chain = ChainedStore->getChain();
+ SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+
+ if (Chain != BetterChain) {
+ MadeChange = true;
+ BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
+ }
+ }
+
+ // Do all replacements after finding the replacements to make to avoid making
+ // the chains more complicated by introducing new TokenFactors.
+ for (auto Replacement : BetterChains)
+ replaceStoreChain(Replacement.first, Replacement.second);
+
+ return MadeChange;
+}
+
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2b9ba2c1b534..cfbb20947acc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -118,9 +118,9 @@ bool FastISel::lowerArguments() {
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
E = FuncInfo.Fn->arg_end();
I != E; ++I) {
- DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
- FuncInfo.ValueMap[I] = VI->second;
+ FuncInfo.ValueMap[&*I] = VI->second;
}
return true;
}
@@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
// have to worry about calling conventions and target-specific lowering code.
// Instead we perform the call lowering right here.
//
- // CALLSEQ_START(0)
+ // CALLSEQ_START(0...)
// STACKMAP(id, nbytes, ...)
// CALLSEQ_END(0, 0)
//
@@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(0);
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
// Issue STACKMAP.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
return true;
- case Intrinsic::eh_actions: {
- unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
@@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
bool FastISel::selectInstruction(const Instruction *I) {
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I))
- if (!handlePHINodesInSuccessorBlocks(I->getParent()))
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
return false;
+ }
DbgLoc = I->getDebugLoc();
@@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
LibInfo->hasOptimizedCodeGen(Func))
return false;
- // Don't handle Intrinsic::trap if a trap funciton is specified.
+ // Don't handle Intrinsic::trap if a trap function is specified.
if (F && F->getIntrinsicID() == Intrinsic::trap &&
Call->hasFnAttr("trap-func-name"))
return false;
@@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) {
DbgLoc = DebugLoc();
// Undo phi node updates, because they will be added again by SelectionDAG.
- if (isa<TerminatorInst>(I))
+ if (isa<TerminatorInst>(I)) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
return false;
}
@@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
- MSucc->getBasicBlock());
- FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, DbgLoc);
}
/// Emit an FNeg operation.
@@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, unsigned Op0,
- bool Op0IsKill, const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
.addFPImm(FPImm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill, unsigned Op1,
- bool Op1IsKill, uint64_t Imm1,
- uint64_t Imm2) {
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- unsigned ResultReg = createResultReg(RC);
- Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
- Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, getKillRegState(Op0IsKill))
- .addReg(Op1, getKillRegState(Op1IsKill))
- .addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
@@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC, uint64_t Imm1,
- uint64_t Imm2) {
- unsigned ResultReg = createResultReg(RC);
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addImm(Imm1)
- .addImm(Imm2);
- else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
- .addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
- }
- return ResultReg;
-}
-
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cc306cbf5ae4..b62bd2bd63ee 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
MachineModuleInfo &MMI = MF->getMMI();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- // Static allocas can be folded into the initial stack frame adjustment.
- if (AI->isStaticAlloca()) {
+ Type *Ty = AI->getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
- Type *Ty = AI->getAllocatedType();
uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
- AI->getAlignment());
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
-
} else {
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
- AI->getAllocatedType()),
- AI->getAlignment());
- unsigned StackAlign =
- MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
@@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for inline asm that clobbers the SP register.
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(I);
+ ImmutableCallSite CS(&*I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo()->setHasVAStart(true);
}
- // If we have a musttail call in a variadic funciton, we need to ensure we
+ // If we have a musttail call in a variadic function, we need to ensure we
// forward implicit register parameters.
if (const auto *CI = dyn_cast<CallInst>(I)) {
if (CI->isMustTailCall() && Fn->isVarArg())
@@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
- if (isUsedOutsideOfDefiningBlock(I))
- if (!isa<AllocaInst>(I) ||
- !StaticAllocaMap.count(cast<AllocaInst>(I)))
- InitializeRegForValue(I);
+ if (isUsedOutsideOfDefiningBlock(&*I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(&*I);
// Collect llvm.dbg.declare information. This is done now instead of
// during the initial isel pass through the IR so that it is done
@@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
// Decide the preferred extend type for a value.
- PreferredExtendType[I] = getPreferredExtendForValue(I);
+ PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
for (BB = Fn->begin(); BB != EB; ++BB) {
- MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
- MBBMap[BB] = MBB;
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB->isEHPad()) {
+ const Instruction *I = BB->getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create funclets, so we could avoid setting
+ // this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(I)) {
+ MMI.setHasEHFunclets(true);
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(I)) {
+ assert(&*BB->begin() == I &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(I))
+ assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
+ MBBMap[&*BB] = MBB;
MF->push_back(MBB);
// Transfer the address-taken flag. This is necessary because there could
@@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Mark landing pad blocks.
SmallVector<const LandingPadInst *, 4> LPads;
for (BB = Fn->begin(); BB != EB; ++BB) {
- if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
- MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
- if (BB->isLandingPad())
- LPads.push_back(BB->getLandingPadInst());
+ const Instruction *FNP = BB->getFirstNonPHI();
+ if (BB->isEHPad() && MBBMap.count(&*BB))
+ MBBMap[&*BB]->setIsEHPad();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
+ LPads.push_back(LPI);
}
- // If this is an MSVC EH personality, we need to do a bit more work.
- EHPersonality Personality = EHPersonality::Unknown;
- if (Fn->hasPersonalityFn())
- Personality = classifyEHPersonality(Fn->getPersonalityFn());
- if (!isMSVCEHPersonality(Personality))
+ // If this personality uses funclets, we need to do a bit more work.
+ if (!Fn->hasPersonalityFn())
+ return;
+ EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ if (!isFuncletEHPersonality(Personality))
return;
- if (Personality == EHPersonality::MSVC_Win64SEH ||
- Personality == EHPersonality::MSVC_X86SEH) {
- addSEHHandlersForLPads(LPads);
- }
-
- WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn);
- if (Personality == EHPersonality::MSVC_CXX) {
- const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
- calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
- }
-
- // Copy the state numbers to LandingPadInfo for the current function, which
- // could be a handler or the parent. This should happen for 32-bit SEH and
- // C++ EH.
- if (Personality == EHPersonality::MSVC_CXX ||
- Personality == EHPersonality::MSVC_X86SEH) {
- for (const LandingPadInst *LP : LPads) {
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
- }
- }
-}
-
-void FunctionLoweringInfo::addSEHHandlersForLPads(
- ArrayRef<const LandingPadInst *> LPads) {
- MachineModuleInfo &MMI = MF->getMMI();
-
- // Iterate over all landing pads with llvm.eh.actions calls.
- for (const LandingPadInst *LP : LPads) {
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LP->getNextNode());
- if (!ActionsCall ||
- ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
- continue;
-
- // Parse the llvm.eh.actions call we found.
- MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
- SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
- parseEHActions(ActionsCall, Actions);
-
- // Iterate EH actions from most to least precedence, which means
- // iterating in reverse.
- for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
- ActionHandler *Action = I->get();
- if (auto *CH = dyn_cast<CatchHandler>(Action)) {
- const auto *Filter =
- dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
- assert((Filter || CH->getSelector()->isNullValue()) &&
- "expected function or catch-all");
- const auto *RecoverBA =
- cast<BlockAddress>(CH->getHandlerBlockOrFunc());
- MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ calculateCatchReturnSuccessorColors(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.CatchObj.Alloca) {
+ assert(StaticAllocaMap.count(H.CatchObj.Alloca));
+ H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca];
} else {
- assert(isa<CleanupHandler>(Action));
- const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
- MMI.addSEHCleanupHandler(LPadMBB, Fini);
+ H.CatchObj.FrameIndex = INT_MAX;
}
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
}
}
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
+ }
}
/// clear - Clear out all the function-specific state. This returns this
/// FunctionLoweringInfo to an empty state, ready to be used for a
/// different function.
void FunctionLoweringInfo::clear() {
- assert(CatchInfoFound.size() == CatchInfoLost.size() &&
- "Not all catch info was assigned to a landing pad!");
-
MBBMap.clear();
ValueMap.clear();
StaticAllocaMap.clear();
-#ifndef NDEBUG
- CatchInfoLost.clear();
- CatchInfoFound.clear();
-#endif
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
@@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
return 0;
}
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ unsigned &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
/// being passed to this variadic function, and set the MachineModuleInfo's
/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
@@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
MachineBasicBlock *MBB) {
- MMI.addPersonality(
- MBB,
- cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()));
+ if (const auto *PF = dyn_cast<Function>(
+ I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+ MMI.addPersonality(PF);
if (I.isCleanup())
MMI.addCleanup(MBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 5ec10308dc28..a1e2d410ab00 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC);
+ TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fbc8f1e89f6e..f46767f6c4a1 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -39,6 +39,10 @@ using namespace llvm;
#define DEBUG_TYPE "legalizedag"
+namespace {
+
+struct FloatSignAsInt;
+
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
@@ -51,7 +55,6 @@ using namespace llvm;
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's.
///
-namespace {
class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
@@ -130,7 +133,11 @@ private:
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandFCOPYSIGN(SDNode *Node);
+ void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
SDLoc dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@@ -138,6 +145,7 @@ private:
SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
SDLoc dl);
+ SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl);
SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
@@ -146,10 +154,11 @@ private:
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
- std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
-
- void ExpandNode(SDNode *Node);
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
void PromoteNode(SDNode *Node);
public:
@@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend) {
- SDValue Result =
- DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, false, Alignment);
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ false, false, false, Alignment);
return Result;
}
SDValue Result =
- DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false, false,
- Alignment);
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Result =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
return Result;
}
@@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Store the vector.
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, 0);
// Truncate or zero extend offset to target pointer type.
- unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
// Add the offset to the index.
unsigned EltSize = EltVT.getSizeInBits()/8;
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
@@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
- return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false,
- false, 0);
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI),
+ false, false, false, 0);
}
@@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
case TargetLowering::Legal: {
// If this is an unaligned store and the target doesn't support it,
// expand it.
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
ReplaceNode(SDValue(Node, 0), Result);
} else {
- switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
- StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
- if (Align < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
break;
}
case TargetLowering::Custom: {
@@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
- }
- }
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
break;
}
case TargetLowering::Custom: {
@@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Res.getValue(1);
}
} else {
- // If this is an unaligned load and the target doesn't support
- // it, expand it.
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
- if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
- if (Align < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
- }
- }
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Load.getValue(1);
break;
}
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+ Chain, Ptr, ISrcVT,
+ LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
}
assert(!SrcVT.isVector() &&
@@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal &&
+ assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Node->getValueType(i))) &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
- assert((TLI.getTypeAction(*DAG.getContext(),
- Op.getValueType()) == TargetLowering::TypeLegal ||
- Op.getOpcode() == ISD::TargetConstant) &&
- "Unexpected illegal type!");
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Op.getValueType()) ||
+ Op.getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
#endif
// Figure out the correct action; the way to query this varies by opcode
@@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
case ISD::VAARG:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getValueType(0));
@@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ Node->getOpcode() == ISD::SETCC ? 2 :
+ Node->getOpcode() == ISD::SETCCE ? 3 : 1;
unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
@@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
case ISD::READ_REGISTER:
case ISD::WRITE_REGISTER:
// Named register is legal in the DAG, but blocked by register name
@@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Node);
+ if (ExpandNode(Node))
+ return;
+ // FALL THROUGH
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
return;
case TargetLowering::Promote:
PromoteNode(Node);
@@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
// the vector. If all are expanded here, we don't want one store per vector
// element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
@@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
continue;
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load).
+ if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
+ continue;
+
StackPtr = ST->getBasePtr();
Ch = SDValue(ST, 0);
break;
@@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
@@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDLoc dl(Node);
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
false, false, false, 0);
}
-SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
- SDLoc dl(Node);
- SDValue Tmp1 = Node->getOperand(0);
- SDValue Tmp2 = Node->getOperand(1);
-
- // Get the sign bit of the RHS. First obtain a value that has the same
- // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
- SDValue SignBit;
- EVT FloatVT = Tmp2.getValueType();
- EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+namespace {
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+};
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ SDLoc DL, SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) {
- // Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignBit(NumBits);
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo, false, false, 0);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
} else {
- auto &DL = DAG.getDataLayout();
- // Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getPointerTy(DL);
- // First create a temporary that is aligned for both the load and store.
- SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
- // Then store the float to it.
- SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
- false, false, 0);
- if (DL.isBigEndian()) {
- assert(FloatVT.isByteSized() && "Unsupported floating point type!");
- // Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
- } else { // Little endian
- SDValue LoadPtr = StackPtr;
- // The float may be wider than the integer we are going to load. Advance
- // the pointer so that the loaded integer will contain the sign bit.
- unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
- unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
- LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
- DAG.getConstant(ByteOffset, dl,
- LoadPtr.getValueType()));
- // Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, false, 0);
- // Move the sign bit to the top bit of the loaded integer.
- unsigned BitShift = LoadTy.getSizeInBits() -
- (FloatVT.getSizeInBits() - 8 * ByteOffset);
- assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
- if (BitShift)
- SignBit = DAG.getNode(
- ISD::SHL, dl, LoadTy, SignBit,
- DAG.getConstant(BitShift, dl,
- TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
- }
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+ IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
}
- // Now get the sign bit proper, by seeing whether the value is negative.
- SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
- SignBit,
- DAG.getConstant(0, dl, SignBit.getValueType()),
- ISD::SETLT);
- // Get the absolute value of the result.
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
- // Select between the nabs and abs value based on the sign bit of
- // the input.
- return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
- DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
- AbsVal);
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
+ IntPtr, State.IntPointerInfo, MVT::i8,
+ false, false, false, 0);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ SDLoc DL, SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8, false, false,
+ 0);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo, false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform values to integer, copy the sign bit and transform back.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ assert(SignAsInt.SignMask == MagAsInt.SignMask);
+ SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+ ClearSignMask);
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
+
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
}
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
int SPFI = StackPtrFI->getIndex();
- SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
- StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- Node->getValueType(0).getVectorElementType(),
- false, false, 0);
- return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI),
- false, false, false, 0);
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType(), false, false, 0);
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+ false, false, 0);
}
static bool
@@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx =
DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
}
SmallSet<SDValue, 16> DefinedValues;
@@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// Return true if divmod libcall is available.
-static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
- const TargetLowering &TLI) {
- RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
- case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
- case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
- case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
- case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
- }
-
- return TLI.getLibcallName(LC) != nullptr;
-}
-
-/// Only issue divrem libcall if both quotient and remainder are needed.
-static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
- // The other use might have been replaced with a divrem already.
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
- unsigned OtherOpcode = 0;
- if (isSigned)
- OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
- else
- OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
-
- SDValue Op0 = Node->getOperand(0);
- SDValue Op1 = Node->getOperand(1);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == Node)
- continue;
- if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
- User->getOperand(0) == Op0 &&
- User->getOperand(1) == Op1)
- return true;
- }
- return false;
-}
-
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Op0,
EVT DestVT,
SDLoc dl) {
+ // TODO: Should any fast-math-flags be set for the created nodes?
+
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
Alignment = std::min(Alignment, 4u);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
- FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
else {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, false, Alignment);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
+/// Open code the operations for BITREVERSE.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2;
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift = Shift.shl(J);
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
@@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
}
-std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
- unsigned Opc = Node->getOpcode();
- MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
- return ExpandChainLibCall(LC, Node, false);
-}
-
-void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::BITREVERSE:
+ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ break;
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
break;
@@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
case ISD::EH_SJLJ_SETJMP:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.push_back(DAG.getConstant(0, dl, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
- case ISD::ATOMIC_FENCE: {
- // If the target didn't lower this, lower it to '__sync_synchronize()' call
- // FIXME: handle "fence singlethread" more efficiently.
- TargetLowering::ArgListTy Args;
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::ATOMIC_LOAD: {
// There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
@@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Swap.getValue(1));
break;
}
- // By default, atomic intrinsics are marked Legal and lowered. Targets
- // which don't support them directly, however, may want libcalls, in which
- // case they mark them Expand, and we get here.
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_CMP_SWAP: {
- std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
- Results.push_back(Tmp.first);
- Results.push_back(Tmp.second);
- break;
- }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
// Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
// splits out the success value as a comparison. Expanding the resulting
@@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
- case ISD::TRAP: {
- // If this operation is not supported, lower it to 'abort()' call
- TargetLowering::ArgListTy Args;
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- Results.push_back(CallResult.second);
- break;
- }
case ISD::FP_ROUND:
case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
@@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0),
Tmp1, ISD::SETLT);
True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ // TODO: Should any fast-math-flags be set for the FSUB?
False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
DAG.getNode(ISD::FSUB, dl, VT,
Node->getOperand(0), Tmp1));
@@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::VAARG: {
- const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
-
- SDValue VAListLoad =
- DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, false, 0);
- SDValue VAList = VAListLoad;
-
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
- VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(Align - 1, dl,
- VAList.getValueType()));
-
- VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- DAG.getConstant(-(int64_t)Align, dl,
- VAList.getValueType()));
- }
-
- // Increment the pointer, VAList, to the next vaarg
- Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
- VT.getTypeForEVT(*DAG.getContext())),
- dl, VAList.getValueType()));
- // Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
- MachinePointerInfo(V), false, false, 0);
- // Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, false, 0));
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
break;
- }
- case ISD::VACOPY: {
- // This defaults to loading a pointer from the input and storing it to the
- // output, returning the chain.
- const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
- const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
- Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
- Node->getOperand(0), Node->getOperand(2),
- MachinePointerInfo(VS), false, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
- MachinePointerInfo(VD), false, false, 0);
- Results.push_back(Tmp1);
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
break;
- }
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
@@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
}
break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
case ISD::FCOPYSIGN:
Results.push_back(ExpandFCOPYSIGN(Node));
break;
case ISD::FNEG:
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
Node->getOperand(0));
Results.push_back(Tmp1);
break;
- case ISD::FABS: {
- // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
- EVT VT = Node->getValueType(0);
- Tmp1 = Node->getOperand(0);
- Tmp2 = DAG.getConstantFP(0.0, dl, VT);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, ISD::SETUGT);
- Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
- Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
- Results.push_back(Tmp1);
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
break;
- }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
- case ISD::FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
- break;
- case ISD::FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
- break;
- case ISD::FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
- break;
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
- bool isSIN = Node->getOpcode() == ISD::FSIN;
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
@@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
- if (!isSIN)
+ if (Node->getOpcode() == ISD::FCOS)
Tmp1 = Tmp1.getValue(1);
Results.push_back(Tmp1);
- } else if (isSIN) {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
}
break;
}
- case ISD::FSINCOS:
- // Expand into sincos libcall.
- ExpandSinCosLibCall(Node, Results);
- break;
- case ISD::FLOG:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
- break;
- case ISD::FLOG2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
- break;
- case ISD::FLOG10:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
- break;
- case ISD::FEXP:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
- break;
- case ISD::FEXP2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
- break;
- case ISD::FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
- break;
- case ISD::FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
- break;
- case ISD::FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
- break;
- case ISD::FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
- break;
- case ISD::FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
- break;
- case ISD::FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
- break;
- case ISD::FPOWI:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
- break;
- case ISD::FPOW:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
- break;
- case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
- break;
- case ISD::FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
- break;
- case ISD::FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
- break;
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
- case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
- break;
- case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
- break;
- case ISD::FP16_TO_FP: {
- if (Node->getValueType(0) == MVT::f32) {
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
- break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
-
- // We can extend to types bigger than f32 in two steps without changing the
- // result. Since "f16 -> f32" is much more commonly available, give CodeGen
- // the option of emitting that before resorting to a libcall.
- SDValue Res =
- DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
- Results.push_back(
- DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
- }
- case ISD::FP_TO_FP16: {
+ case ISD::FP_TO_FP16:
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl));
Results.push_back(
DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
- break;
}
}
-
- RTLIB::Libcall LC =
- RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
- Results.push_back(ExpandLibCall(LC, Node, false));
break;
- }
case ISD::ConstantFP: {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
@@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
Results.push_back(Tmp1);
- } else {
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
}
break;
}
@@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- // If div is legal, it's better to do the normal expansion
- !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
- useDivRem(Node, isSigned, false))) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ Results.push_back(Tmp1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::UDIV:
@@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool isSigned = Node->getOpcode() == ISD::SDIV;
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
- SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
- (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- useDivRem(Node, isSigned, true)))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
- else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128);
- else
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128);
- Results.push_back(Tmp1);
+ Results.push_back(Tmp1);
+ }
break;
}
case ISD::MULHU:
@@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1.getValue(1));
break;
}
- case ISD::SDIVREM:
- case ISD::UDIVREM:
- // Expand into divrem libcall
- ExpandDivRemLibCall(Node, Results);
- break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
- break;
}
-
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128);
- Results.push_back(Tmp1);
break;
}
case ISD::SADDO:
@@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(), MemVT,
- false, false, false, 0);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT,
+ false, false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
@@ -4092,16 +3948,276 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
+ if (Results.empty())
+ return false;
+
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ }
+ break;
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
+ case ISD::FSUB:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ }
+
+ // Replace the original node with the legalized result.
if (!Results.empty())
ReplaceNode(Node, Results.data());
}
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
- Node->getOpcode() == ISD::SETCC) {
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::BR_CC)
@@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FCOPYSIGN:
case ISD::FPOW: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
- Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
@@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
case ISD::FFLOOR:
@@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
}
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the smae total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() {
for (auto NI = allnodes_end(); NI != allnodes_begin();) {
--NI;
- SDNode *N = NI;
+ SDNode *N = &*NI;
if (N->use_empty() && N != getRoot().getNode()) {
++NI;
DeleteNode(N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a4155731..6c0193a76732 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Result Float to Integer Conversion.
+// Convert Float Results to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
-void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ assert(isLegalInHWReg(N->getValueType(ResNo)) &&
+ "Unsupported SoftenFloatRes opcode!");
+ // Only when isLegalInHWReg, we can skip check of the operands.
+ R = SDValue(N, ResNo);
+ break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP:
- R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
- break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
@@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
}
// If R is null, the sub-method took care of registering the result.
- if (R.getNode())
+ if (R.getNode()) {
SetSoftenedFloat(SDValue(N, ResNo), R);
+ ReplaceSoftenFloatResult(N, ResNo, R);
+ }
+ // Return true only if the node is changed,
+ // assuming that the operands are also converted when necessary.
+ // Otherwise, return false to tell caller to scan operands.
+ return R.getNode() && R.getNode() != N;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
return BitConvertToInteger(N->getOperand(0));
}
@@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
- return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, we can load better from the constant pool.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0)));
+ CN->getValueType(0)));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FABS can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
@@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
@@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
@@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, dl).first;
+ NVT, Ops, false, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SoftenFloatResult(Op.getNode(), 0);
}
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
false, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
@@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
@@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, SDLoc(N)).first;
+ NVT, Op, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
+ bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ if (N != NewL.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}
@@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
- return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ if (LegalInHWReg)
+ return ExtendNode;
+ return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
return NewVAARG;
}
@@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, Signed, dl).first;
+ Op, Signed, dl).first;
}
//===----------------------------------------------------------------------===//
-// Operand Float to Integer Conversion..
+// Convert Float Operand to Integer for Non-HW-supported Operations.
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
+ if (CanSkipSoftenFloatOperand(N, OpNo))
+ return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
- case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
- case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::STORE:
+ Res = SoftenFloatOp_STORE(N, OpNo);
+ // Do not try to analyze or soften this node again if the value is
+ // or can be held in a register. In that case, Res.getNode() should
+ // be equal to N.
+ if (Res.getNode() == N &&
+ isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // Otherwise, we need to reanalyze and lower the new Res nodes.
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
if (!Res.getNode()) return false;
// If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
+ // core about this to re-analyze.
if (Res.getNode() == N)
return true;
@@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return false;
}
+bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // When the operand type can be kept in registers, SoftenFloatResult
+ // will call ReplaceValueWith to replace all references and we can
+ // skip softening this operand.
+ switch (N->getOperand(OpNo).getOpcode()) {
+ case ISD::BITCAST:
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ return true;
+ }
+ // For some opcodes, SoftenFloatResult handles all conversion of softening
+ // and replacing operands, so that there is no need to soften operands
+ // again, although such opcode could be scanned for other illegal operands.
+ switch (N->getOpcode()) {
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ return true;
+ }
+ return false;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
@@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
@@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT SVT = N->getOperand(0).getValueType();
EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
- EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+
+ // Truncate the result if the libcall returns a larger type.
+ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, 3, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, 2, false,
+ N->getValueType(0), Ops, false,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
break;
}
+ // TODO: Are there fast-math-flags to propagate to this FADD?
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
APInt(128, Parts)),
@@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
DAG.getNode(ISD::ADD, dl, MVT::i32,
DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
false, dl).first;
}
@@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
}
SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9f060a09a0f3..cd114d668e20 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
- case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
- case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
- case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+ case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+ case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
N->getChain(), N->getBasePtr(),
Op2, N->getMemOperand(), N->getOrdering(),
N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
@@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
N->getMemoryVT(), N->getMemOperand());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
- SDValue Mask = N->getMask();
- EVT NewMaskVT = getSetCCResultType(NVT);
- if (NewMaskVT != N->getMask().getValueType())
- Mask = PromoteTargetBoolean(Mask, NewMaskVT);
SDLoc dl(N);
-
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMask(), ExtSrc0, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
+ assert(NVT == ExtSrc0.getValueType() &&
+ "Gather result type and the passThru agrument type should be the same");
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ N->getIndex()};
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
+
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
@@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
- // Sign extend the input.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
@@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
return Mul;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
- // Zero extend the input.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)));
@@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- EVT MaskVT = Mask.getValueType();
SDLoc dl(N);
bool TruncateStore = false;
- if (!TLI.isTypeLegal(DataVT)) {
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
- DataOp = GetPromotedInteger(DataOp);
- if (!TLI.isTypeLegal(MaskVT))
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
- TruncateStore = true;
- }
+ if (OpNo == 2) {
+ // Mask comes before the data operand. If the data operand is legal, we just
+ // promote the mask.
+ // When the data operand has illegal type, we should legalize the data
+ // operand first. The mask will be promoted/splitted/widened according to
+ // the data operand type.
+ if (TLI.isTypeLegal(DataVT))
+ Mask = PromoteTargetBoolean(Mask, DataVT);
else {
- assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
- "Unexpected data legalization in MSTORE");
- DataOp = GetWidenedVector(DataOp);
-
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- EVT BoolVT = getSetCCResultType(DataOp.getValueType());
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
+ else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
-
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ else {
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
}
}
+ } else { // Data operand
+ assert(OpNo == 3 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
}
- else
- Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
N->getMemoryVT(), N->getMemOperand(),
TruncateStore);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
assert(OpNo == 2 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
@@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo)
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
@@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
-void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi) {
- SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
- SplitInteger(Res, Lo, Hi);
-}
-
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
}
}
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
Lo, Hi);
}
@@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
}
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
}
@@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
- dl).first,
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
- Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
return;
}
@@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
@@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = Tmp1;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCE if the target supports it.
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (TLI.getOperationAction(
+ ISD::SETCCE,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
+ TargetLowering::Custom) {
+ // SETCCE can detect < and >= directly. For > and <=, flip operands and
+ // condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCE. The SETCCE operation is essentially looking at the high part of
+ // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
+ // positive iff LHS >= RHS.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
+ SDValue Res =
+ DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
LHSHi, RHSHi, ISD::SETEQ, false,
DagCombineInfo, dl);
@@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
DAG.getCondCode(CCCode)), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCE for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
@@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
- FudgePtr,
- MachinePointerInfo::getConstantPool(),
- MVT::f32,
- false, false, false, Alignment);
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
@@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 54cfaf570619..2a0b0aa44794 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// (for example because it was created but not used). In general, we cannot
// distinguish between new nodes and deleted nodes.
SmallVector<SDNode*, 16> NewNodes;
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
// Remember nodes marked NewNode - they are subject to extra checking below.
- if (I->getNodeId() == NewNode)
- NewNodes.push_back(I);
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
- for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
- SDValue Res(I, i);
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
bool Failed = false;
unsigned Mapped = 0;
if (ReplacedValues.find(Res) != ReplacedValues.end()) {
Mapped |= 1;
// Check that remapped values are only used by nodes marked NewNode.
- for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
UI != UE; ++UI)
if (UI.getUse().getResNo() == i)
assert(UI->getNodeId() == NewNode &&
@@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
if (WidenedVectors.find(Res) != WidenedVectors.end())
Mapped |= 128;
- if (I->getNodeId() != Processed) {
+ if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
// marked NewNode too, since a deleted node may have been reallocated as
// another node that has not been seen by the LegalizeTypes machinery.
- if ((I->getNodeId() == NewNode && Mapped > 1) ||
- (I->getNodeId() != NewNode && Mapped != 0)) {
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
@@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() {
// Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
// (and remembering them) if they are leaves and assigning 'Unanalyzed' if
// non-leaves.
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
- if (I->getNumOperands() == 0) {
- I->setNodeId(ReadyToProcess);
- Worklist.push_back(I);
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
} else {
- I->setNodeId(Unanalyzed);
+ Node.setNodeId(Unanalyzed);
}
}
@@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- SoftenFloatResult(N, i);
- Changed = true;
- goto NodeDone;
+ Changed = SoftenFloatResult(N, i);
+ if (Changed)
+ goto NodeDone;
+ // If not changed, the result type should be legally in register.
+ assert(isLegalInHWReg(ResultVT) &&
+ "Unchanged SoftenFloatResult should be legal in register!");
+ goto ScanOperands;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -409,40 +411,48 @@ NodeDone:
// In a debug build, scan all the nodes to make sure we found them all. This
// ensures that there are no cycles and that everything got processed.
#ifndef NDEBUG
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = DAG.allnodes_end(); I != E; ++I) {
+ for (SDNode &Node : DAG.allnodes()) {
bool Failed = false;
// Check that all result types are legal.
- if (!IgnoreNodeResults(I))
- for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(I->getValueType(i))) {
- dbgs() << "Result type " << i << " illegal!\n";
+ // A value type is illegal if its TypeAction is not TypeLegal,
+ // and TLI.RegClassForVT does not have a register class for this type.
+ // For example, the x86_64 target has f128 that is not TypeLegal,
+ // to have softened operators, but it also has FR128 register class to
+ // pass and return f128 values. Hence a legalized node can have f128 type.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i)) &&
+ !TLI.isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump();
Failed = true;
}
// Check that all operand types are legal.
- for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
- if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
- !isTypeLegal(I->getOperand(i).getValueType())) {
- dbgs() << "Operand type " << i << " illegal!\n";
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType()) &&
+ !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump();
Failed = true;
}
- if (I->getNodeId() != Processed) {
- if (I->getNodeId() == NewNode)
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
dbgs() << "New node not analyzed?\n";
- else if (I->getNodeId() == Unanalyzed)
+ else if (Node.getNodeId() == Unanalyzed)
dbgs() << "Unanalyzed node not noticed?\n";
- else if (I->getNodeId() > 0)
+ else if (Node.getNodeId() > 0)
dbgs() << "Operand not processed?\n";
- else if (I->getNodeId() == ReadyToProcess)
+ else if (Node.getNodeId() == ReadyToProcess)
dbgs() << "Not added to worklist?\n";
Failed = true;
}
if (Failed) {
- I->dump(&DAG); dbgs() << "\n";
+ Node.dump(&DAG); dbgs() << "\n";
llvm_unreachable(nullptr);
}
}
@@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- assert(Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ // f128 of x86_64 could be kept in SSE registers,
+ // but sometimes softened to i128.
+ assert((Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
SDValue &OpEntry = SoftenedFloats[Op];
- assert(!OpEntry.getNode() && "Node is already converted to integer!");
+ // Allow repeated calls to save f128 type nodes
+ // or any node with type that transforms to itself.
+ // Many operations on these types are not softened.
+ assert((!OpEntry.getNode()||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ "Node is already converted to integer!");
OpEntry = Result;
}
@@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0),
- &Ops[0], NumOps, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
@@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
+/// WidenTargetBoolean - Widen the given target boolean to a target boolean
+/// of the given type. The boolean vector is widened and then promoted to match
+/// the target boolean type of the given ValVT.
+SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
+ bool WithZeroes) {
+ SDLoc dl(Bool);
+ EVT BoolVT = Bool.getValueType();
+
+ assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
+ TLI.isTypeLegal(ValVT) &&
+ "Unexpected types in WidenTargetBoolean");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
+ ValVT.getVectorNumElements());
+ Bool = ModifyToType(Bool, WideVT, WithZeroes);
+ return PromoteTargetBoolean(Bool, ValVT);
+}
+
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
/// bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d1131a74cf17..8ba19f76797f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -72,6 +72,20 @@ private:
return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
+ /// isSimpleLegalType - Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ /// isLegalInHWReg - Return true if this type can be passed in registers.
+ /// For example, x86_64's f128, should to be legally in registers
+ /// and only some operations converted to library calls or integer
+ /// bitwise operations.
+ bool isLegalInHWReg(EVT VT) const {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return VT == NVT && isSimpleLegalType(VT);
+ }
+
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -173,6 +187,11 @@ private:
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ /// Modify Bit Vector to match SetCC result type of ValVT.
+ /// The bit vector is widened with zeroes when WithZeroes is true.
+ SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
+
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -234,6 +253,7 @@ private:
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
@@ -246,21 +266,22 @@ private:
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_SDIV(SDNode *N);
SDValue PromoteIntRes_SELECT(SDNode *N);
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
SDValue PromoteIntRes_SHL(SDNode *N);
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_UDIV(SDNode *N);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -276,7 +297,6 @@ private:
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
@@ -284,7 +304,6 @@ private:
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -294,6 +313,8 @@ private:
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -312,8 +333,6 @@ private:
// Integer Result Expansion.
void ExpandIntegerResult(SDNode *N, unsigned ResNo);
- void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -322,6 +341,7 @@ private:
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -333,6 +353,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -354,12 +375,10 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
- SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
- SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -375,32 +394,48 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetSoftenedFloat - Given a processed operand Op which was converted to an
- /// integer of the same size, this returns the integer. The integer contains
- /// exactly the same bits as Op - only the type changed. For example, if Op
- /// is an f32 which was softened to an i32, then this method returns an i32,
- /// the bits of which coincide with those of Op.
+ /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer
+ /// if the Op is not supported in target HW and converted to the integer.
+ /// The integer contains exactly the same bits as Op - only the type changed.
+ /// For example, if Op is an f32 which was softened to an i32, then this method
+ /// returns an i32, the bits of which coincide with those of Op.
+ /// If the Op can be efficiently supported in target HW or the operand must
+ /// stay in a register, the Op is not converted to an integer.
+ /// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
SDValue &SoftenedOp = SoftenedFloats[Op];
+ if (!SoftenedOp.getNode() &&
+ isSimpleLegalType(Op.getValueType()))
+ return Op;
RemapValue(SoftenedOp);
assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Result Float to Integer Conversion.
- void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
+ void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
+ // When the result type can be kept in HW registers, the converted
+ // NewRes node could have the same type. We can save the effort in
+ // cloning every user of N in SoftenFloatOperand or other legalization functions,
+ // by calling ReplaceValueWith here to update all users.
+ if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
+ ReplaceValueWith(SDValue(N, ResNo), NewRes);
+ }
+
+ // Convert Float Results to Integer for Non-HW-supported Operations.
+ bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
- SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -412,7 +447,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -425,21 +460,25 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N);
- SDValue SoftenFloatRes_SELECT(SDNode *N);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Operand Float to Integer Conversion.
+ // Return true if we can skip softening the given operand or SDNode because
+ // it was soften before by SoftenFloatResult and references to the operand
+ // were replaced by ReplaceValueWith.
+ bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
+
+ // Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
- SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -575,7 +614,6 @@ private:
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
- SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
@@ -617,20 +655,18 @@ private:
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
@@ -650,6 +686,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -680,8 +717,8 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
- SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
@@ -693,6 +730,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
@@ -707,9 +745,11 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
@@ -745,8 +785,10 @@ private:
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
- SDValue ModifyToType(SDValue InOp, EVT WidenVT);
-
+ /// When FillWithZeroes is "on" the vector will be widened with
+ /// zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 14d8f7762086..593c346df770 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat:
- // Convert the integer operand instead.
- SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ case TargetLowering::TypeSoftenFloat: {
+ // Expand the floating point operand only if it was converted to integers.
+ // Otherwise, it is a legal type like f128 that can be saved in a register.
+ auto SoftenedOp = GetSoftenedFloat(InOp);
+ if (SoftenedOp == InOp)
+ break;
+ SplitInteger(SoftenedOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
NOutVT.getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
// Emit a store to the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 83d4ad5ea1f4..f61f631e2ff8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,8 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -159,7 +161,7 @@ bool VectorLegalizer::Run() {
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
+ LegalizeOp(SDValue(&*I, 0));
// Finally, it's possible the root changed. Get the new root.
SDValue OldRoot = DAG.getRoot();
@@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
assert(Result.getValue(1).use_empty() &&
"There are still live users of the old chain!");
return LegalizeOp(Lowered);
- } else {
- return TranslateLegalizeResults(Op, Lowered);
}
+ return TranslateLegalizeResults(Op, Lowered);
}
case TargetLowering::Expand:
Changed = true;
@@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore())
- switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
@@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
- } else if (Op.getOpcode() == ISD::MSCATTER)
+ } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
HasVectorValue = true;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
@@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ROTL:
case ISD::ROTR:
case ISD::BSWAP:
+ case ISD::BITREVERSE:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FABS:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::MSCATTER:
QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
break;
+ case ISD::MSTORE:
+ QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ break;
}
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
Result = Promote(Op);
Changed = true;
@@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
Operands[j] = Op.getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandFNEG(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDLoc DL(Op);
SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
Zero, Op.getOperand(0));
}
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+ // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+ unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
+ return Op;
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 51cd6619f783..d0187d36dee2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::FPOW:
case ISD::FREM:
@@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ LHS.getValueType(), LHS, RHS, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->isInvariant(), N->getOriginalAlignment(),
N->getAAInfo());
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
@@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
@@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
@@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+ const SDNodeFlags *Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(LD, 1), Ch);
}
@@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
+ SDValue Src0 = MLD->getSrc0();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
@@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
- SDValue Src0 = MLD->getSrc0();
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MLD, 1), Ch);
@@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
+ SDValue Index = MGT->getIndex();
unsigned Alignment = MGT->getOriginalAlignment();
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
}
@@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue Src0Lo, Src0Hi;
- std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(MGT, 1), Ch);
@@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+ MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
+ MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
// if Alignment is equal to the vector size,
// take the half of it for the second part
@@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
+ // Split all operands
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
- GetSplitVector(Data, DataLo, DataHi);
- SDValue MaskLo, MaskHi;
- GetSplitVector(Mask, MaskLo, MaskHi);
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
- SDValue PtrLo, PtrHi;
- if (Ptr.getValueType().isVector()) // gather form vector of pointers
- std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
else
- PtrLo = PtrHi = Ptr;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
SDValue IndexHi, IndexLo;
- if (Index.getNode())
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
else
- IndexLo = IndexHi = Index;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
@@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+ SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
@@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
//===----------------------------------------------------------------------===//
@@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
case ISD::ADD:
case ISD::AND:
@@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
Res = WidenVecRes_Binary(N);
break;
case ISD::FADD:
- case ISD::FCOPYSIGN:
case ISD::FMUL:
case ISD::FPOW:
case ISD::FSUB:
@@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
// No legal vector version so unroll the vector operation and then widen.
@@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue EOp2 = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
}
@@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
+ EOp1, EOp2, Flags);
}
CurNumElts = 0;
}
@@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
-
+ const SDNodeFlags *Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (InVTNumElts == WidenNumElts) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
- return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
}
@@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
- return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
if (InVTNumElts % WidenNumElts == 0) {
@@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
- return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
@@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType);
- // Legalized the chain result - switch anything that used the old chain to
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Src0 = GetWidenedVector(N->getValue());
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
@@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
@@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
false);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only data operand of mscatter");
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(DataOp);
+ EVT WideVT = WideVal.getValueType();
+ unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen index
+ SDValue Index = MSC->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
// Note that InOp might have been widened so it might already have
// the right width or it might need be narrowed.
EVT InVT = InOp.getValueType();
@@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
unsigned NumConcat = WidenNumElts / InNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
Ops[0] = InOp;
for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
+ Ops[i] = FillVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
@@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = UndefVal;
+ Ops[Idx] = FillVal;
return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 6303422b9ae9..622e06f0da2a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
TII = STI.getInstrInfo();
ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
// This hard requirement could be relaxed, but for now
- // do not let it procede.
+ // do not let it proceed.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
unsigned NumRC = TRI->getNumRegClasses();
@@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
}
// Now see if there are no other dependencies
- // to instructions alredy in the packet.
+ // to instructions already in the packet.
for (unsigned i = 0, e = Packet.size(); i != e; ++i)
for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
E = Packet[i]->Succs.end(); I != E; ++I) {
// Since we do not add pseudos to packets, might as well
- // ignor order deps.
+ // ignore order deps.
if (I->isCtrl())
continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34e1a7001082..62e7733ecd2b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e9bd52034ffd..91024e672f9c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -141,8 +141,8 @@ private:
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
- std::vector<SUnit*> LiveRegDefs;
- std::vector<SUnit*> LiveRegGens;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
// Collect interferences between physical register use/defs.
// Each interference is an SUnit and set of physical registers.
@@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() {
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
- LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
- LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
CallSeqEndForStart.clear();
assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
@@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- std::vector<SUnit*> &LiveRegDefs,
+ SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI) {
@@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
/// by RegMask, and add them to LRegs.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
- std::vector<SUnit*> &LiveRegDefs,
+ ArrayRef<SUnit*> LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs) {
// Look at all live registers. Skip Reg0 and the special CallResource.
@@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
- CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
RegAdded, LRegs, TRI);
}
@@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
i += NumVals;
@@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
}
if (const uint32_t *RegMask = getNodeRegMask(Node))
- CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+ CheckForLiveRegDefMasked(SU, RegMask,
+ makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
return !LRegs.empty();
@@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const uint16_t *ImpDefs
+ const MCPhysReg *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
if(!ImpDefs && !RegMask)
@@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
return true;
if (ImpDefs)
- for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
// Return true if SU clobbers this physical register use and the
// definition of the register reaches from DepSU. IsReachable queries
// a topological forward sort of the DAG (following the successors).
@@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const uint16_t *SUImpDefs =
+ const MCPhysReg *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
if (!SUImpDefs && !SURegMask)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 159c28cd2a61..5cc806668b12 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -86,12 +86,6 @@ namespace llvm {
/// flagged together nodes with a single SUnit.
void BuildSchedGraph(AliasAnalysis *AA);
- /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
- /// CopyToReg and its only active data operands are CopyFromReg within a
- /// single block loop.
- ///
- void InitVRegCycleFlag(SUnit *SU);
-
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
void InitNumRegDefsLeft(SUnit *SU);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 14f44ccc60ce..abbc48e10e46 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
-/// isScalarToVector - Return true if the specified node is a
-/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
-/// element is not an undef.
-bool ISD::isScalarToVector(const SDNode *N) {
- if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return true;
-
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
- if (N->getOperand(0).getOpcode() == ISD::UNDEF)
- return false;
- unsigned NumElems = N->getNumOperands();
- if (NumElems == 1)
- return false;
- for (unsigned i = 1; i < NumElems; ++i) {
- SDValue V = N->getOperand(i);
- if (V.getOpcode() != ISD::UNDEF)
- return false;
- }
- return true;
-}
-
/// allOperandsUndef - Return true if the node has at least one operand
/// and all operands of the specified node are ISD::UNDEF.
bool ISD::allOperandsUndef(const SDNode *N) {
@@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
ID.AddInteger(Op.getResNo());
}
}
+
/// Add logical or fast math flag values to FoldingSetNodeID value.
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
const SDNodeFlags *Flags) {
- if (!Flags || !isBinOpWithFlags(Opcode))
+ if (!isBinOpWithFlags(Opcode))
return;
- unsigned RawFlags = Flags->getRawFlags();
- // If no flags are set, do not alter the ID. We must match the ID of nodes
- // that were created without explicitly specifying flags. This also saves time
- // and allows a gradual increase in API usage of the optional optimization
- // flags.
- if (RawFlags != 0)
- ID.AddInteger(RawFlags);
+ unsigned RawFlags = 0;
+ if (Flags)
+ RawFlags = Flags->getRawFlags();
+ ID.AddInteger(RawFlags);
}
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
- if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
- AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+ AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
}
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() {
SmallVector<SDNode*, 128> DeadNodes;
// Add all obviously-dead nodes to the DeadNodes worklist.
- for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
- if (I->use_empty())
- DeadNodes.push_back(I);
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
RemoveDeadNodes(DeadNodes);
@@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) {
void SelectionDAG::InsertNode(SDNode *N) {
AllNodes.push_back(N);
#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
}
@@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
UpdateListeners(nullptr) {
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
while (!AllNodes.empty())
- DeallocateNode(AllNodes.begin());
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
}
BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
@@ -1023,7 +1003,7 @@ void SelectionDAG::clear() {
static_cast<SDNode*>(nullptr));
EntryNode.UseList = nullptr;
- AllNodes.push_back(&EntryNode);
+ InsertNode(&EntryNode);
Root = getEntryNode();
DbgInfo->clear();
}
@@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
- TargetFlags);
+ SDNode *N =
+ new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
if (OpTy == ShTy || OpTy.isVector()) return Op;
- ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
- return getNode(Opcode, SDLoc(Op), ShTy, Op);
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(Align - 1, dl, VAList.getValueType()));
+
+ VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
+ Node->getOperand(0), Node->getOperand(2),
+ MachinePointerInfo(VS), false, false, false, 0);
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
}
/// CreateStackTemporary - Create a stack temporary, suitable for holding the
@@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
/// CreateStackTemporary - Create a stack temporary suitable for holding
/// either of the specified value types.
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
- VT2.getStoreSizeInBits())/8;
+ unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
@@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
}
break;
}
@@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Tmp == 1) return 1; // Early out.
Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ return std::min(Tmp, Tmp2);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
const int rIndex = Items - 1 -
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- // If the sign portion ends in our element the substraction gives correct
+ // If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
@@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ APInt AZero, AOne;
+ APInt BZero, BOne;
+ computeKnownBits(A, AZero, AOne);
+ computeKnownBits(B, BZero, BOne);
+ return (AZero | BZero).isAllOnesValue();
+}
+
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
- else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
- EVT SVT = VT.getScalarType();
- EVT InVT = BV->getValueType(0);
- EVT InSVT = InVT.getScalarType();
-
- // Find legal integer scalar type for constant promotion and
- // ensure that its scalar size is at least as large as source.
- EVT LegalSVT = SVT;
- if (SVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
- if (LegalSVT.bitsLT(SVT)) break;
- }
-
- // Let the above scalar folding handle the folding of each element.
- SmallVector<SDValue, 8> Ops;
- for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- SDValue OpN = BV->getOperand(i);
- EVT OpVT = OpN.getValueType();
-
- // Build vector (integer) scalar operands may need implicit
- // truncation - do this before constant folding.
- if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
- OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
-
- OpN = getNode(Opcode, DL, SVT, OpN);
-
- // Legalize the (integer) scalar constant if necessary.
- if (LegalSVT != SVT)
- OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
-
- if (OpN.getOpcode() != ISD::UNDEF &&
- OpN.getOpcode() != ISD::Constant &&
- OpN.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(OpN);
- }
- if (Ops.size() == VT.getVectorNumElements())
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
- break;
+ SDValue Ops = { Operand };
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
}
}
}
@@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid fpext node, dst < src!");
if (Operand.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid sext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
else if (OpOpcode == ISD::UNDEF)
@@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid zext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
@@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
- "Invalid anyext node, dst < src!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
@@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
- "Invalid truncate node, src < dst!");
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
+ assert(Operand.getValueType().bitsGT(VT) &&
+ "Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
@@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0));
+ Operand.getNode()->getOperand(0),
+ &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getNode()->getOperand(0);
break;
@@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
@@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
}
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+ EVT VT,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ if (!VT.isVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorNumElements() == NumElts;
+ };
+
+ auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (Op.getOpcode() == ISD::UNDEF) ||
+ (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build vector of constant
+ // or UNDEF scalars.
+ if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
+ !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean
+ // that is then sign-extended back to the legal result type.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned i = 0; i != NumElts; i++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+ if (!InBV) {
+ // We've checked that this is UNDEF or a constant of some kind.
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp = InBV->getOperand(i);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (ScalarResult.getOpcode() != ISD::UNDEF &&
+ ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ assert(ScalarResults.size() == NumElts &&
+ "Unexpected number of scalar results for BUILD_VECTOR");
+ return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags *Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ // Canonicalize constant to RHS if commutative.
+ if (isCommutativeBinOp(Opcode)) {
+ if (N1C && !N2C) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ } else if (N1CFP && !N2CFP) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+
switch (Opcode) {
default: break;
case ISD::TokenFactor:
@@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
- // 0+x --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
- if (CFP->getValueAPF().isZero())
- return N2;
// x+0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FSUB) {
// x-0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
} else if (Opcode == ISD::FMUL) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
- SDValue V = N2;
-
- // If the first operand isn't the constant, try the second
- if (!CFP) {
- CFP = dyn_cast<ConstantFPSDNode>(N2);
- V = N1;
- }
-
- if (CFP) {
- // 0*x --> 0
- if (CFP->isZero())
- return SDValue(CFP,0);
- // 1*x --> x
- if (CFP->isExactlyValue(1.0))
- return V;
- }
+ // x*0 --> 0
+ if (N2CFP && N2CFP->isZero())
+ return N2;
+ // x*1 --> x
+ if (N2CFP && N2CFP->isExactlyValue(1.0))
+ return N1;
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
VT.bitsLE(N1.getValueType()) &&
- isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ N2C && "Invalid FP_ROUND!");
if (N1.getValueType() == VT) return N1; // noop conversion.
break;
case ISD::AssertSext:
@@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
- if (Op.getValueType() != VT.getScalarType()) break;
if (Op.getOpcode() == ISD::UNDEF) {
- Ops.push_back(Op);
+ Ops.push_back(getUNDEF(VT.getScalarType()));
continue;
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = C->getAPIntValue();
+ Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
Ops.push_back(SignExtendInReg(Val));
continue;
}
@@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
return N1.getOperand(N2C->getZExtValue());
// EXTRACT_ELEMENT of a constant int is also very common.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (N1C) {
unsigned ElementSize = VT.getSizeInBits();
unsigned Shift = ElementSize * N2C->getZExtValue();
- APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR: {
- SDValue Index = N2;
+ case ISD::EXTRACT_SUBVECTOR:
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
@@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
- if (isa<ConstantSDNode>(Index)) {
- assert((VT.getVectorNumElements() +
- cast<ConstantSDNode>(Index)->getZExtValue()
+ if (N2C) {
+ assert((VT.getVectorNumElements() + N2C->getZExtValue()
<= N1.getValueType().getVectorNumElements())
&& "Extract subvector overflow!");
}
@@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
}
- }
// Perform trivial constant folding.
if (SDValue SV =
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
- // Canonicalize constant to RHS if commutative.
- if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
-
// Constant fold FP operations.
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
if (N1CFP) {
- if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Canonicalize constant to RHS if commutative.
- std::swap(N1CFP, N2CFP);
- std::swap(N1, N2);
- } else if (N2CFP) {
+ if (N2CFP) {
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
@@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
break;
case ISD::FREM :
- s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ s = V1.mod(V2);
if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
s!=APFloat::opDivByZero)) {
return getConstantFP(V1, DL, VT);
@@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
switch (Opcode) {
case ISD::FMA: {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
break;
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
- SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
- if (Simp.getNode()) return Simp;
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return V;
break;
}
case ISD::SELECT:
- if (N1C) {
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
if (N1C->getZExtValue())
return N2; // select true, X, Y -> X
return N3; // select false, X, Y -> Y
@@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
return true;
}
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction()->optForMinSize();
+ return MF.getFunction()->optForSize();
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
@@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
@@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
true, DstPtrInfo, SrcPtrInfo);
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
// memcpy is not guaranteed to be safe. libc memcpys aren't required to
// respect volatile, so they may do things like read or write memory
@@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
@@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
// Emit a library call.
Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
@@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
- return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
// If this is (FI+Offset1)+Offset2, we can model it.
if (Ptr.getOpcode() != ISD::ADD ||
@@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
return MachinePointerInfo();
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- return MachinePointerInfo::getFixedStack(FI, Offset+
- cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
}
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
- return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.getOpcode() == ISD::UNDEF)
- return InferPointerInfo(Ptr);
+ return InferPointerInfo(DAG, Ptr);
return MachinePointerInfo();
}
@@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr, Offset);
+ PtrInfo = InferPointerInfo(*this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
Flags |= MachineMemOperand::MONonTemporal;
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(Ptr);
+ PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
@@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- MaskedGatherSDNode *N =
+ MaskedGatherSDNode *N =
new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
Ops, VTs, VT, MMO);
CSEMap.InsertNode(N, IP);
@@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0]);
- case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
}
@@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
"Update with wrong number of operands");
// If no operands changed just return the input node.
- if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
return N;
// See if the modified node already exists.
@@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = I++;
+ SDNode *N = &*I++;
checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
N->setNodeId(DAGSize++);
- allnodes_iterator Q = N;
+ allnodes_iterator Q(N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
@@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
- SDNode *N = I;
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
@@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
P->setNodeId(Degree);
}
}
- if (I == SortedPos) {
+ if (&Node == SortedPos) {
#ifndef NDEBUG
- SDNode *S = ++I;
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
dbgs() << "Overran sorted position:\n";
S->dumprFull(this); dbgs() << "\n";
dbgs() << "Checking if this is due to cycles\n";
@@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
// SDNode Class
//===----------------------------------------------------------------------===//
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
+const SDNodeFlags *SDNode::getFlags() const {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ return &FlagsNode->Flags;
+ return nullptr;
+}
+
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
switch (N->getOpcode()) {
- default:
- Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
break;
+ }
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
@@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ APFloat APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c3c0eb101a0..d2ea85ab4d22 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +64,7 @@
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision",
cl::init(0));
static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
// Limit the width of DAG chains. This is important in general to prevent
@@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
@@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
- bool Smaller = ValueVT.bitsLE(PartEVT);
- return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT, Val);
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
@@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
}
if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT) {
- bool Smaller = ValueVT.bitsLE(PartEVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, ValueVT.getScalarType(), Val);
- }
+ ValueVT.getVectorElementType() != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
@@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
@@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
// Promoted vector extract
- bool Smaller = PartEVT.bitsLE(ValueVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else{
// Vector -> scalar conversion.
assert(ValueVT.getVectorNumElements() == 1 &&
@@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- bool Smaller = ValueVT.bitsLE(PartVT);
- Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
- DL, PartVT, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
Parts[0] = Val;
@@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
+ for (EVT ValueVT : ValueVTs) {
unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
@@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
- if (!isa<TerminatorInst>(&I) && !HasTailCall)
+ if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ !isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
@@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
uint64_t Offset = DI->getOffset();
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
Val)) {
SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
- IsIndirect, Offset, dl, DbgSDNodeOrder);
+ false, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
llvm_unreachable("Can't get register for value!");
}
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of a funclet.
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
@@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
- SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs;
@@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- if (!BPI)
- return 0;
const BasicBlock *SrcBB = Src->getBasicBlock();
const BasicBlock *DstBB = Dst->getBasicBlock();
- return BPI->getEdgeWeight(SrcBB, DstBB);
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(
+ std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
}
-void SelectionDAGBuilder::
-addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight /* = 0 */) {
- if (!Weight)
- Weight = getEdgeWeight(Src, Dst);
- Src->addSuccessor(Dst, Weight);
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
}
-
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
@@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TWeight,
- uint32_t FWeight) {
+ BranchProbability TProb,
+ BranchProbability FProb) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
Condition = getICmpCondCode(IC->getPredicate());
- } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
Condition = getFCmpCondCode(FC->getPredicate());
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- } else {
- (void)Condition; // silence warning.
- llvm_unreachable("Unknown compare instruction");
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
- TBB, FBB, CurBB, TWeight, FWeight);
+ TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
return;
}
@@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, TBB, FBB, CurBB, TWeight, FWeight);
+ nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
-/// Scale down both weights to fit into uint32_t.
-static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
- uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
- NewTrue = NewTrue / Scale;
- NewFalse = NewFalse / Scale;
-}
-
/// FindMergedConditions - If Cond is an expression like
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- unsigned Opc, uint32_t TWeight,
- uint32_t FWeight) {
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb) {
// If this node is not part of the or/and tree, emit it as a branch.
const Instruction *BOp = dyn_cast<Instruction>(Cond);
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TWeight, FWeight);
+ TProb, FProb);
return;
}
// Create TmpBB after CurBB.
- MachineFunction::iterator BBI = CurBB;
+ MachineFunction::iterator BBI(CurBB);
MachineFunction &MF = DAG.getMachineFunction();
MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
CurBB->getParent()->insert(++BBI, TmpBB);
@@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
// = TrueProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
- // assumes that
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
- uint64_t NewTrueWeight = TWeight;
- uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = TWeight;
- NewFalseWeight = 2 * (uint64_t)FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
// = FalseProb for original BB.
- // Assuming the original weights are A and B, one choice is to set BB1's
- // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
- // assumes that
- // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
-
- uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
- uint64_t NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ NewTrueProb, NewFalseProb);
- NewTrueWeight = 2 * (uint64_t)TWeight;
- NewFalseWeight = FWeight;
- ScaleWeights(NewTrueWeight, NewFalseWeight);
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- NewTrueWeight, NewFalseWeight);
+ Probs[0], Probs[1]);
}
}
@@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
- BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
- getEdgeWeight(BrMBB, Succ1MBB));
+ Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB));
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
// Update successor info
- addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
// TrueBB and FalseBB are always different unless the incoming IR is
// degenerate. This only happens when running llc on weird IR.
if (CB.TrueBB != CB.FalseBB)
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
@@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
GuardPtr, MachinePointerInfo(IRGuard, 0),
true, false, false, Align);
- SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
- StackSlotPtr,
- MachinePointerInfo::getFixedStack(FI),
- true, false, false, Align);
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+ false, false, Align);
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
@@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- nullptr, 0, false, getCurSDLoc(), false, false).second;
+ None, false, getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithWeight(SwitchBB, B.Default);
- addSuccessorWithWeight(SwitchBB, MBB);
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, RangeCmp,
@@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
@@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
- // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
- addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
- // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
- addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, getControlRoot(),
@@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
- // Retrieve successors.
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
- MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
const Value *Callee(I.getCalledValue());
const Function *Fn = dyn_cast<Function>(Callee);
@@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, LandingPad);
+ visitPatchpoint(&I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
}
} else
- LowerCallTo(&I, getValue(Callee), false, LandingPad);
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
@@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
}
- // Update successor info
- addSuccessorWithWeight(InvokeMBB, Return);
- addSuccessorWithWeight(InvokeMBB, LandingPad);
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
@@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
}
void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
- assert(FuncInfo.MBB->isLandingPad() &&
+ assert(FuncInfo.MBB->isEHPad() &&
"Call to landingpad not in landing pad!");
MachineBasicBlock *MBB = FuncInfo.MBB;
@@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.getExceptionPointerRegister() == 0 &&
- TLI.getExceptionSelectorRegister() == 0)
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
return;
SmallVector<EVT, 2> ValueVTs;
@@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
// If this case has the same successor and is a neighbour, merge it into
// the previous cluster.
Clusters[DstIndex - 1].High = CaseVal;
- Clusters[DstIndex - 1].Weight += CC.Weight;
- assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
+ Clusters[DstIndex - 1].Prob += CC.Prob;
} else {
std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
sizeof(Clusters[SrcIndex]));
@@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
continue;
MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
- addSuccessorWithWeight(IndirectBrMBB, Succ);
+ addSuccessorWithProb(IndirectBrMBB, Succ);
}
+ IndirectBrMBB->normalizeSuccProbs();
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
MVT::Other, getControlRoot(),
@@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// Min/max matching is only viable if all output VTs are the same.
if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
- ISD::NodeType Opc = ISD::DELETED_NODE;
- switch (SPF) {
- case SPF_UMAX: Opc = ISD::UMAX; break;
- case SPF_UMIN: Opc = ISD::UMIN; break;
- case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- default: break;
- }
-
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
- while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
+ VT != TLI.getTypeToTransformTo(Ctx, VT))
VT = TLI.getTypeToTransformTo(Ctx, VT);
- if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
- // If the underlying comparison instruction is used by any other instruction,
- // the consumed instructions won't be destroyed, so it is not profitable
- // to convert to a min/max.
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY: {
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ Opc = ISD::FMINNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
+ Opc = ISD::FMINNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+ ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ }
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ Opc = ISD::FMAXNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
+ Opc = ISD::FMAXNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+ ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ }
+ break;
+ default: break;
+ }
+
+ if (Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
OpCode = Opc;
LHSVal = getValue(LHS);
@@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// throughout the function's lifetime.
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
- isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+ isDereferenceablePointer(SV, DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
@@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(
- MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+ else if (AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Value *PtrOperand = I.getArgOperand(1);
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(I.getArgOperand(0));
@@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
setValue(&I, StoreNode);
}
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
-static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
+static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
- assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (!Gep || Gep->getNumOperands() > 2)
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getNumOperands() > 2)
return false;
- ShuffleVectorInst *ShuffleInst =
- dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
- if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
- cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
- Instruction::InsertElement)
+
+ const Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+ Value *IndexVal = GEP->getOperand(1);
- SelectionDAG& DAG = SDB->DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Check is the Ptr is inside current basic block
- // If not, look for the shuffle instruction
- if (SDB->findValue(Ptr))
- Base = SDB->getValue(Ptr);
- else if (SDB->findValue(ShuffleInst)) {
- SDValue ShuffleNode = SDB->getValue(ShuffleInst);
- SDLoc sdl = ShuffleNode;
- Base = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, sdl,
- ShuffleNode.getValueType().getScalarType(), ShuffleNode,
- DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDB->setValue(Ptr, Base);
- }
- else
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
- Value *IndexVal = Gep->getOperand(1);
- if (SDB->findValue(IndexVal)) {
- Index = SDB->getValue(IndexVal);
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ // Suppress sign extension.
+ if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ if (SDB->findValue(Sext->getOperand(0))) {
IndexVal = Sext->getOperand(0);
- if (SDB->findValue(IndexVal))
- Index = SDB->getValue(IndexVal);
+ Index = SDB->getValue(IndexVal);
}
- return true;
}
- return false;
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+ Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ }
+ return true;
}
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
- Value *Ptr = I.getArgOperand(1);
+ const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
@@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
- Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+ const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
MachineMemOperand::MOStore, VT.getStoreSize(),
@@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDValue InChain = DAG.getRoot();
if (AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
InChain = DAG.getEntryNode();
}
@@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
- Value *Ptr = I.getArgOperand(0);
+ const Value *Ptr = I.getArgOperand(0);
SDValue Src0 = getValue(I.getArgOperand(3));
SDValue Mask = getValue(I.getArgOperand(2));
@@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
- Value *BasePtr = Ptr;
+ const Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+ AA->pointsToConstantMemory(MemoryLocation(
+ BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
// IntegerPartOfX = ((int32_t)(t0);
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
@@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
//
// #define LOG2OFe 1.4426950f
// t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b, dl));
return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
}
}
+ // TODO: What fast-math-flags should be set on the FMUL node?
if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
- // If optimizing for size, don't insert too many multiplies. This
- // inserts up to 5 multiplies.
+ if (!F->optForSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
@@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
// the benefit of being both really simple and much better than a libcall.
SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
@@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getTruncatedArgReg - Find underlying register used for an truncated
-// argument.
-static unsigned getTruncatedArgReg(const SDValue &N) {
- if (N.getOpcode() != ISD::TRUNCATE)
+// getUnderlyingArgReg - Find underlying register used for a truncated or
+// bitcasted argument.
+static unsigned getUnderlyingArgReg(const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg:
+ return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ return getUnderlyingArgReg(N.getOperand(0));
+ default:
return 0;
-
- const SDValue &Ext = N.getOperand(0);
- if (Ext.getOpcode() == ISD::AssertZext ||
- Ext.getOpcode() == ISD::AssertSext) {
- const SDValue &CFR = Ext.getOperand(0);
- if (CFR.getOpcode() == ISD::CopyFromReg)
- return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
}
- return 0;
}
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
@@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
- unsigned Reg;
- if (N.getOpcode() == ISD::CopyFromReg)
- Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- else
- Reg = getTruncatedArgReg(N);
+ unsigned Reg = getUnderlyingArgReg(N);
if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
@@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
- isa<Argument>(Address);
-
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-
- if (isParameter && !AI) {
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (FINode)
- // Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
- else {
- // Address is an argument, so try to emit its dbg value using
- // virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
- N);
- return nullptr;
- }
- } else if (AI)
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
+ FINode->getIndex(), 0, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ N);
+ return nullptr;
+ } else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
- else {
- // Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
- DEBUG(Address->dump());
- return nullptr;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
@@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- // A dbg.value for an alloca is always indirect.
- bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
- IsIndirect, N)) {
+ false, N)) {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- IsIndirect, Offset, dl, SDNodeOrder);
+ false, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::eh_sjlj_setup_dispatch: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return nullptr;
+ }
case Intrinsic::masked_gather:
visitMaskedGather(I);
@@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
} else {
+ // TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res.getValue(1));
return nullptr;
}
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return nullptr;
}
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy != ResTy)
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
@@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
@@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::clear_cache:
return TLI.getClearCacheBuiltinName();
- case Intrinsic::eh_actions:
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
case Intrinsic::donothing:
// ignore
return nullptr;
@@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
visitStatepoint(I);
return nullptr;
}
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
case Intrinsic::experimental_gc_result: {
visitGCResult(I);
return nullptr;
@@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
-
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::eh_begincatch:
- case Intrinsic::eh_endcatch:
- llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+
+ case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
- unsigned Reg = TLI.getExceptionPointerRegister();
- assert(Reg && "cannot get exception code on this platform");
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
- assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
- unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N =
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
return nullptr;
}
@@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MMI.getContext().createTempSymbol();
@@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
- LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
@@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(Result.second);
}
- if (LandingPad) {
+ if (EHPadBB) {
// Insert a label at the end of the invoke call to mark the try range. This
// can be used to detect deletion of the invoke via the MachineModuleInfo.
MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+ if (MMI.hasEHFunclets()) {
+ assert(CLI.CS);
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ BeginLabel, EndLabel);
+ } else {
+ MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
}
return Result;
@@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
@@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CS)
.setTailCall(isTailCall);
- std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
@@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
std::pair<unsigned, const TargetRegisterClass *> MatchRC =
TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
@@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getStore(Chain, getCurSDLoc(),
- OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
- unsigned NumArgs, SDValue Callee,
- Type *ReturnTy,
- MachineBasicBlock *LandingPad,
- bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+ ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
.setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
- return lowerInvokable(CLI, LandingPad);
+ return lowerInvokable(CLI, EHPadBB);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad) {
+ const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
@@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
- std::pair<SDValue, SDValue> Result =
- lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- LandingPad, true);
+ std::pair<SDValue, SDValue> Result = lowerCallOperands(
+ CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
- else if (j != 0)
+ else if (j != 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
@@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
PtrVT));
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
- false, false, 1);
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ false, false, false, 1);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
if (FastISel)
return A->use_empty();
- const BasicBlock *Entry = A->getParent()->begin();
+ const BasicBlock &Entry = A->getParent()->front();
for (const User *U : A->users())
- if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
return true;
@@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (F.getCallingConv() == CallingConv::X86_INTR) {
+ // IA Interrupt passes frame (1st parameter) by value in the stack.
+ if (Idx == 1)
+ Flags.setByVal();
+ }
if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
@@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
- else if (i > 0)
+ else if (i > 0) {
MyFlags.Flags.setOrigAlign(1);
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
Ins.push_back(MyFlags);
}
if (NeedsRegBlock && Value == NumValues - 1)
@@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If this argument is unused then remember its value. It is used to generate
// debugging information.
if (I->use_empty() && NumValues) {
- SDB->setUnusedArgValue(I, InVals[i]);
+ SDB->setUnusedArgValue(&*I, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(I, Res);
+ SDB->setValue(&*I, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
// If this argument is live outside of the entry block, insert a copy from
@@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[I] = Reg;
+ FuncInfo->ValueMap[&*I] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(I);
- SDB->CopyToExportRegsIfNeeded(I);
+ if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&*I);
+ SDB->CopyToExportRegsIfNeeded(&*I);
}
}
@@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB,
// If SuccBB has not been created yet, create it.
if (!SuccMBB) {
MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI = ParentMBB;
+ MachineFunction::iterator BBI(ParentMBB);
SuccMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(++BBI, SuccMBB);
}
// Add it as a successor of ParentMBB.
ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
return SuccMBB;
}
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
- MachineFunction::iterator I = MBB;
+ MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
return nullptr;
- return I;
+ return &*I;
}
/// During lowering new call nodes can be created (such as memset, etc.).
@@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
CaseCluster &JTCluster) {
assert(First <= Last);
- uint32_t Weight = 0;
+ auto Prob = BranchProbability::getZero();
unsigned NumCmps = 0;
std::vector<MachineBasicBlock*> Table;
- DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Weight += Clusters[I].Weight;
- assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+ Prob += Clusters[I].Prob;
APInt Low = Clusters[I].Low->getValue();
APInt High = Clusters[I].High->getValue();
NumCmps += (Low == High) ? 1 : 2;
@@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
for (uint64_t J = 0; J < ClusterSize; ++J)
Table.push_back(Clusters[I].MBB);
- JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTWeights.size();
+ unsigned NumDests = JTProbs.size();
if (isSuitableForBitTests(NumDests, NumCmps,
Clusters[First].Low->getValue(),
Clusters[Last].High->getValue())) {
@@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
for (MachineBasicBlock *Succ : Table) {
if (Done.count(Succ))
continue;
- addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
Done.insert(Succ);
}
+ JumpTableMBB->normalizeSuccProbs();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
@@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
JTCases.emplace_back(std::move(JTH), std::move(JT));
JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
- JTCases.size() - 1, Weight);
+ JTCases.size() - 1, Prob);
return true;
}
@@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
.getSizeInBits();
assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
- if (Low.isNonNegative() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a
- // word without having to subtract minValue. In this case,
- // we can optimize away the subtraction.
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
LowBound = APInt::getNullValue(Low.getBitWidth());
CmpRange = High;
+ ContiguousRange = false;
} else {
LowBound = Low;
CmpRange = High - Low;
}
CaseBitsVector CBV;
- uint32_t TotalWeight = 0;
+ auto TotalProb = BranchProbability::getZero();
for (unsigned i = First; i <= Last; ++i) {
// Find the CaseBits for this destination.
unsigned j;
@@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
if (CBV[j].BB == Clusters[i].MBB)
break;
if (j == CBV.size())
- CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
CaseBits *CB = &CBV[j];
- // Update Mask, Bits and ExtraWeight.
+ // Update Mask, Bits and ExtraProb.
uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
CB->Bits += Hi - Lo + 1;
- CB->ExtraWeight += Clusters[i].Weight;
- TotalWeight += Clusters[i].Weight;
- assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
}
BitTestInfo BTI;
std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
- // Sort by weight first, number of bits second.
- if (a.ExtraWeight != b.ExtraWeight)
- return a.ExtraWeight > b.ExtraWeight;
+ // Sort by probability first, number of bits second.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
return a.Bits > b.Bits;
});
for (auto &CB : CBV) {
MachineBasicBlock *BitTestBB =
FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
- BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
}
BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false, nullptr,
- nullptr, std::move(BTI));
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
- BitTestCases.size() - 1, TotalWeight);
+ BitTestCases.size() - 1, TotalProb);
return true;
}
@@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *DefaultMBB) {
MachineFunction *CurMF = FuncInfo.MF;
MachineBasicBlock *NextMBB = nullptr;
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
if (++BBI != FuncInfo.MF->end())
- NextMBB = BBI;
+ NextMBB = &*BBI;
unsigned Size = W.LastCluster - W.FirstCluster + 1;
@@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
ISD::SETEQ);
// Update successor info.
- // Both Small and Big will jump to Small.BB, so we sum up the weights.
- addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
- addSuccessorWithWeight(
- SwitchMBB, DefaultMBB,
- // The default destination is the first successor in IR.
- BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
- : 0);
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
// Insert the true branch.
SDValue BrCond =
@@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
if (TM.getOptLevel() != CodeGenOpt::None) {
- // Order cases by weight so the most likely case will be checked first.
+ // Order cases by probability so the most likely case will be checked first.
std::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
- return a.Weight > b.Weight;
+ return a.Prob > b.Prob;
});
// Rearrange the case blocks so that the last one falls through if possible
- // without without changing the order of weights.
+ // without without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
- if (I->Weight > W.LastCluster->Weight)
+ if (I->Prob > W.LastCluster->Prob)
break;
if (I->Kind == CC_Range && I->MBB == NextMBB) {
std::swap(*I, *W.LastCluster);
@@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- // Compute total weight.
- uint32_t UnhandledWeights = 0;
- for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
- UnhandledWeights += I->Weight;
- assert(UnhandledWeights >= I->Weight && "Weight overflow!");
- }
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
@@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
+ UnhandledProbs -= I->Prob;
switch (I->Kind) {
case CC_JumpTable: {
@@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
- addSuccessorWithWeight(CurMBB, Fallthrough);
- addSuccessorWithWeight(CurMBB, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
// The jump table header will be inserted in our current block, do the
// range check, and fall through to our fallthrough block.
@@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
- // If we're in the right place, emit the bit test header header right now.
- if (CurMBB ==SwitchMBB) {
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
@@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
RHS = I->High;
}
- // The false weight is the sum of all unhandled cases.
- UnhandledWeights -= I->Weight;
- CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
- UnhandledWeights);
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
+ UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
CaseClusterIt First,
CaseClusterIt Last) {
return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
- if (X.Weight != CC.Weight)
- return X.Weight > CC.Weight;
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
// Ties are broken by comparing the case value.
return X.Low->getValue().slt(CC.Low->getValue());
@@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
- // Balance the tree based on branch weights to create a near-optimal (in terms
- // of search time given key frequency) binary search tree. See e.g. Kurt
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
- uint32_t LeftWeight = LastLeft->Weight;
- uint32_t RightWeight = FirstRight->Weight;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
- // find a partitioning of the clusters which balances the weight on both
- // sides. If LeftWeight and RightWeight are equal, alternate which side is
- // taken to ensure 0-weight nodes are distributed evenly.
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
unsigned I = 0;
while (LastLeft + 1 < FirstRight) {
- if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
- LeftWeight += (++LastLeft)->Weight;
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
else
- RightWeight += (--FirstRight)->Weight;
+ RightProb += (--FirstRight)->Prob;
I++;
}
@@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
- MachineFunction::iterator BBI = W.MBB;
+ MachineFunction::iterator BBI(W.MBB);
++BBI;
// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
@@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
- WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
@@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
- WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
- LeftWeight, RightWeight);
+ LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
- uint32_t Weight =
- BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
- Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 700675453fe7..49a3872d20c8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,6 +17,7 @@
#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -30,7 +31,6 @@
namespace llvm {
class AddrSpaceCastInst;
-class AliasAnalysis;
class AllocaInst;
class BasicBlock;
class BitCastInst;
@@ -154,39 +154,39 @@ private:
unsigned JTCasesIndex;
unsigned BTCasesIndex;
};
- uint32_t Weight;
+ BranchProbability Prob;
static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
- MachineBasicBlock *MBB, uint32_t Weight) {
+ MachineBasicBlock *MBB, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_Range;
C.Low = Low;
C.High = High;
C.MBB = MBB;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster jumpTable(const ConstantInt *Low,
const ConstantInt *High, unsigned JTCasesIndex,
- uint32_t Weight) {
+ BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_JumpTable;
C.Low = Low;
C.High = High;
C.JTCasesIndex = JTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
- unsigned BTCasesIndex, uint32_t Weight) {
+ unsigned BTCasesIndex, BranchProbability Prob) {
CaseCluster C;
C.Kind = CC_BitTests;
C.Low = Low;
C.High = High;
C.BTCasesIndex = BTCasesIndex;
- C.Weight = Weight;
+ C.Prob = Prob;
return C;
}
};
@@ -198,13 +198,13 @@ private:
uint64_t Mask;
MachineBasicBlock* BB;
unsigned Bits;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
- uint32_t Weight):
- Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
- CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
+ CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
};
typedef std::vector<CaseBits> CaseBitsVector;
@@ -217,13 +217,13 @@ private:
/// blocks needed by multi-case switch statements.
struct CaseBlock {
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle,
- MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
- MachineBasicBlock *me,
- uint32_t trueweight = 0, uint32_t falseweight = 0)
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
- TrueWeight(trueweight), FalseWeight(falseweight) { }
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
+ FalseProb(falseprob) {}
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
@@ -239,8 +239,8 @@ private:
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
- // TrueWeight/FalseWeight - branch weights.
- uint32_t TrueWeight, FalseWeight;
+ // TrueProb/FalseProb - branch weights.
+ BranchProbability TrueProb, FalseProb;
};
struct JumpTable {
@@ -272,32 +272,35 @@ private:
struct BitTestCase {
BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- uint32_t Weight):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
- uint32_t ExtraWeight;
+ BranchProbability ExtraProb;
};
typedef SmallVector<BitTestCase, 3> BitTestInfo;
struct BitTestBlock {
- BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, MVT RgVT, bool E,
- MachineBasicBlock* P, MachineBasicBlock* D,
- BitTestInfo C):
- First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- Parent(P), Default(D), Cases(std::move(C)) { }
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+ ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
+ Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
MVT RegVT;
bool Emitted;
+ bool ContiguousRange;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
BitTestInfo Cases;
+ BranchProbability Prob;
+ BranchProbability DefaultProb;
};
/// Minimum jump table density, in percent.
@@ -339,6 +342,7 @@ private:
CaseClusterIt LastCluster;
const ConstantInt *GE;
const ConstantInt *LT;
+ BranchProbability DefaultProb;
};
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
@@ -515,6 +519,7 @@ private:
void resetPerFunctionState() {
FailureMBB = nullptr;
Guard = nullptr;
+ GuardReg = 0;
}
MachineBasicBlock *getParentMBB() { return ParentMBB; }
@@ -592,10 +597,6 @@ public:
///
FunctionLoweringInfo &FuncInfo;
- /// OptLevel - What optimization level we're generating code for.
- ///
- CodeGenOpt::Level OptLevel;
-
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
@@ -613,7 +614,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+ DAG(dag), FuncInfo(funcinfo),
HasTailCall(false) {
}
@@ -692,19 +693,20 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
- MachineBasicBlock *SwitchBB, unsigned Opc,
- uint32_t TW, uint32_t FW);
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TW,
+ BranchProbability FW);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- uint32_t TW, uint32_t FW);
+ BranchProbability TW, BranchProbability FW);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
std::pair<SDValue, SDValue> lowerCallOperands(
ImmutableCallSite CS,
@@ -712,7 +714,7 @@ public:
unsigned NumArgs,
SDValue Callee,
Type *ReturnTy,
- MachineBasicBlock *LandingPad = nullptr,
+ const BasicBlock *EHPadBB = nullptr,
bool IsPatchPoint = false);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
@@ -722,11 +724,11 @@ public:
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
void LowerStatepoint(ImmutableStatepoint Statepoint,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
private:
- std::pair<SDValue, SDValue> lowerInvokable(
- TargetLowering::CallLoweringInfo &CLI,
- MachineBasicBlock *LandingPad);
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -734,11 +736,18 @@ private:
void visitSwitch(const SwitchInst &I);
void visitIndirectBr(const IndirectBrInst &I);
void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
- uint32_t getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const;
- void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
- uint32_t Weight = 0);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
@@ -748,7 +757,7 @@ public:
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
- uint32_t BranchWeightToNext,
+ BranchProbability BranchProbToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
@@ -842,7 +851,7 @@ private:
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
void visitPatchpoint(ImmutableCallSite CS,
- MachineBasicBlock *LandingPad = nullptr);
+ const BasicBlock *EHPadBB = nullptr);
// These three are implemented in StatepointLowering.cpp
void visitStatepoint(const CallInst &I);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5b9b18286fae..a1c6c4c1dd63 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -30,6 +31,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
std::string SDNode::getOperationName(const SelectionDAG *G) const {
switch (getOpcode()) {
default:
@@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
case ISD::ConstantPool: return "ConstantPool";
case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
@@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::FMINNAN: return "fminnan";
+ case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
@@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
+ case ISD::SETCCE: return "setcce";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CALLSEQ_START: return "callseq_start";
case ISD::CALLSEQ_END: return "callseq_end";
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
@@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
case ISD::CTTZ: return "cttz";
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
+
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
@@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETO: return "seto";
case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
+ case ISD::SETUEQ: return "setueq";
case ISD::SETUGT: return "setugt";
case ISD::SETUGE: return "setuge";
case ISD::SETULT: return "setult";
@@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
}
}
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
void SDNode::dump() const { dump(nullptr); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
@@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const {
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (const void*)this << ": ";
-
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ",";
if (getValueType(i) == MVT::Other)
@@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
else
OS << getValueType(i).getEVTString();
}
- OS << " = " << getOperationName(G);
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
@@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
<< ']';
}
- if (unsigned Order = getIROrder())
- OS << " [ORD=" << Order << ']';
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
- if (!G)
- return;
+ if (!G)
+ return;
- DILocation *L = getDebugLoc();
- if (!L)
- return;
+ DILocation *L = getDebugLoc();
+ if (!L)
+ return;
+
+ if (auto *Scope = L->getScope())
+ OS << Scope->getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << L->getLine();
+ if (unsigned C = L->getColumn())
+ OS << ':' << C;
+ }
+}
- if (auto *Scope = L->getScope())
- OS << Scope->getFilename();
- else
- OS << "<unknown>";
- OS << ':' << L->getLine();
- if (unsigned C = L->getColumn())
- OS << ':' << C;
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node) {
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (const SDValue &Op : N->op_values())
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode()))
+ continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)Op.getNode() << ": <multiple use>";
+ }
- dbgs() << '\n';
dbgs().indent(indent);
N->dump(G);
}
void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
+ const SDNode *N = &*I;
+ if (!N->hasOneUse() && N != getRoot().getNode() &&
+ (!shouldPrintInline(*N) || N->use_empty()))
DumpNodes(N, 2, this);
}
@@ -573,10 +606,30 @@ void SelectionDAG::dump() const {
}
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
print_types(OS, G);
+ OS << " = " << getOperationName(G);
print_details(OS, G);
}
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ } else if (shouldPrintInline(*Value.getNode())) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ } else {
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+ }
+}
+
typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
// Having printed this SDNode, walk the children:
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
if (i) OS << ",";
OS << " ";
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (const void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
}
OS << "\n";
@@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const {
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
+ printr(OS, G);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
+ printOperand(OS, G, getOperand(i));
}
- print_details(OS, G);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 97ece8b9248a..853a21a15eb9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
@@ -263,13 +264,17 @@ namespace llvm {
return;
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
- SavedFastISel = IS.TM.Options.EnableFastISel;
- if (NewOptLevel == CodeGenOpt::None)
- IS.TM.setFastISel(true);
DEBUG(dbgs() << "\nChanging optimization level for Function "
<< IS.MF->getFunction()->getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
<< " ; After: -O" << NewOptLevel << "\n");
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ DEBUG(dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
}
~OptLevelChanger() {
@@ -293,6 +298,11 @@ namespace llvm {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
if (OptLevel == CodeGenOpt::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
@@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
- initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
- initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
initializeTargetLibraryInfoWrapperPassPass(
*PassRegistry::getPassRegistry());
}
@@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
+static void SplitCriticalSideEffectEdges(Function &Fn) {
// Loop for blocks with phi nodes.
- for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ for (BasicBlock &BB : Fn) {
+ PHINode *PN = dyn_cast<PHINode>(BB.begin());
if (!PN) continue;
ReprocessBlock:
@@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// are potentially trapping constant expressions. Constant expressions are
// the only potentially trapping value that can occur as the argument to a
// PHI.
- for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
if (!CE || !CE->canTrap()) continue;
@@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
// Okay, we have to split this edge.
SplitCriticalEdge(
- Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
- CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
+ Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+ CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
- FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
@@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF->setHasInlineAsm(false);
+ FuncInfo->SplitCSR = false;
+ SmallVector<MachineBasicBlock*, 4> Returns;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const TerminatorInst *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term))
+ continue;
+ if (isa<ReturnInst>(Term)) {
+ Returns.push_back(FuncInfo->MBBMap[&BB]);
+ continue;
+ }
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
SelectAllBasicBlocks(Fn);
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
- MachineBasicBlock *EntryMBB = MF->begin();
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR)
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
@@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() {
// graph) and preceding back toward the beginning (the entry
// node).
while (ISelPosition != CurDAG->allnodes_begin()) {
- SDNode *Node = --ISelPosition;
+ SDNode *Node = &*--ISelPosition;
// Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
// but there are currently some corner cases that it misses. Also, this
// makes it theoretically possible to disable the DAGCombiner.
@@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() {
PostprocessISelDAG();
}
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
bool SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
-
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
const TargetRegisterClass *PtrRC =
TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ return true;
+ }
+
+ if (!LLVMBB->isLandingPad())
+ return true;
+
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
- // If this is an MSVC-style personality function, we need to split the landing
- // pad into several BBs.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
- MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent()
- ->getParent()
- ->getPersonalityFn()
- ->stripPointerCasts()));
- EHPersonality Personality = MF->getMMI().getPersonalityType();
-
- if (isMSVCEHPersonality(Personality)) {
- SmallVector<MachineBasicBlock *, 4> ClauseBBs;
- const IntrinsicInst *ActionsCall =
- dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
- // Get all invoke BBs that unwind to this landingpad.
- SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
- MBB->pred_end());
- if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
- // If this is a call to llvm.eh.actions followed by indirectbr, then we've
- // run WinEHPrepare, and we should remove this block from the machine CFG.
- // Mark the targets of the indirectbr as landingpads instead.
- for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
- MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
- // Add the edge from the invoke to the clause.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->addSuccessor(ClauseBB);
-
- // Mark the clause as a landing pad or MI passes will delete it.
- ClauseBB->setIsLandingPad();
- }
- }
-
- // Remove the edge from the invoke to the lpad.
- for (MachineBasicBlock *InvokeBB : InvokeBBs)
- InvokeBB->removeSuccessor(MBB);
-
- // Don't select instructions for the landingpad.
- return false;
- }
-
// Mark exception register as live in.
- if (unsigned Reg = TLI->getExceptionPointerRegister())
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
// Mark exception selector register as live in.
- if (unsigned Reg = TLI->getExceptionSelectorRegister())
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
return true;
@@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
- !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
- !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
@@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->VisitedBBs.insert(LLVMBB);
}
- BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
BasicBlock::const_iterator const End = LLVMBB->end();
BasicBlock::const_iterator BI = End;
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
FuncInfo->ExceptionSelectorVirtReg = 0;
- if (LLVMBB->isLandingPad())
+ if (LLVMBB->isEHPad())
if (!PrepareEHLandingPad())
continue;
@@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
- const Instruction *Inst = std::prev(BI);
+ const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// then see if there is a load right before the selected instructions.
// Try to fold the load if so.
const Instruction *BeforeInst = Inst;
- while (BeforeInst != Begin) {
- BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
break;
}
@@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// For the purpose of debugging, just abort.
report_fatal_error("FastISel didn't select the entire block");
- if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
R = FuncInfo->CreateRegs(Inst->getType());
@@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
- SelectBasicBlock(Inst, BI, HadTailCall);
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
// We also need to delete any previously emitted instructions.
@@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() {
CodeGenAndEmitDAG();
}
- uint32_t UnhandledWeight = 0;
- for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
- UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
-
+ BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob;
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
- UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+ UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb;
// Set the current basic block to the mbb we wish to insert the code into
FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Cases[j+1].ThisBB,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range.
+ MachineBasicBlock *NextMBB;
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB;
+ else if (j + 1 != ej)
+ NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB;
else
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- SDB->BitTestCases[i].Default,
- UnhandledWeight,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
- FuncInfo->MBB);
+ NextMBB = SDB->BitTestCases[i].Default;
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ NextMBB,
+ UnhandledProb,
+ SDB->BitTestCases[i].Reg,
+ SDB->BitTestCases[i].Cases[j],
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
+
+ if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ break;
}
// Update PHI Nodes
@@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() {
/// one preferred by the target.
///
ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
- RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-
- if (!Ctor) {
- Ctor = ISHeuristic;
- RegisterScheduler::setDefault(Ctor);
- }
-
- return Ctor(this, OptLevel);
+ return ISHeuristic(this, OptLevel);
}
//===----------------------------------------------------------------------===//
@@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
@@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
/// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
@@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
@@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL,
unsigned ChildNo) {
@@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && C->getSExtValue() == Val;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
@@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 4df5ede388fc..2764688518c2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -80,9 +80,16 @@ namespace llvm {
return true;
}
- static bool hasNodeAddressLabel(const SDNode *Node,
- const SelectionDAG *Graph) {
- return true;
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
}
/// If you want to override the dot attributes printed for a particular
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 34688df4765b..050ec2116c5d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
Builder.FuncInfo.StatepointStackSlots.push_back(FI);
AllocatedStackSlots.push_back(true);
return SpillSlot;
@@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
return Builder.DAG.getFrameIndex(FI, ValueType);
}
// Note: We deliberately choose to advance this only on the failing path.
- // Doing so on the suceeding path involes a bit of complexity that caused a
- // minor bug previously. Unless performance shows this matters, please
+ // Doing so on the succeeding path involves a bit of complexity that caused
+ // a minor bug previously. Unless performance shows this matters, please
// keep this code as simple as possible.
NextSlotToAllocate++;
}
@@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
static Optional<int> findPreviousSpillSlot(const Value *Val,
SelectionDAGBuilder &Builder,
int LookUpDepth) {
- // Can not look any futher - give up now
+ // Can not look any further - give up now
if (LookUpDepth <= 0)
return Optional<int>();
@@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
-/// This helps to avoid series of loads and stores that only serve to resuffle
+/// This helps to avoid series of loads and stores that only serve to reshuffle
/// values on the stack between calls.
static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
@@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
SmallVectorImpl<const Value *> &Relocs,
SelectionDAGBuilder &Builder) {
- // This is horribly ineffecient, but I don't care right now
+ // This is horribly inefficient, but I don't care right now
SmallSet<SDValue, 64> Seen;
SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
@@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
static SDNode *
-lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
SelectionDAGBuilder &Builder,
SmallVectorImpl<SDValue> &PendingExports) {
ImmutableCallSite CS(ISP.getCallSite());
- SDValue ActualCallee = Builder.getValue(ISP.getCalledValue());
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = Builder.DAG.getTargetLoweringInfo();
+ const auto &DL = Builder.DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(),
+ TLI.getPointerTy(DL, AS));
+ } else
+ ActualCallee = Builder.getValue(ISP.getCalledValue());
assert(CS.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
@@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+ ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB,
false /* IsPatchPoint */);
SDNode *CallEnd = CallEndVal.getNode();
@@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
// ch, glue = callseq_end ch, glue
// get_return_value ch, glue
//
- // get_return_value can either be a CopyFromReg to grab the return value from
- // %RAX, or it can be a LOAD to load a value returned by reference via a stack
- // slot.
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
- if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
- CallEnd->getOpcode() == ISD::LOAD))
- CallEnd = CallEnd->getOperand(0).getNode();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
- if (HasDef) {
- if (CS.isInvoke()) {
- // Result value will be used in different basic block for invokes
- // so we need to export it now. But statepoint call has a different type
- // than the actuall call. It means that standart exporting mechanism will
- // create register of the wrong type. So instead we need to create
- // register with correct type and save value into it manually.
+ // Export the result value if needed
+ const Instruction *GCResult = ISP.getGCResult();
+ if (HasDef && GCResult) {
+ if (GCResult->getParent() != CS.getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now.
+ // Default exporting mechanism will not work here because statepoint call
+ // has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
// TODO: To eliminate this problem we can remove gc.result intrinsics
- // completelly and make statepoint call to return a tuple.
+ // completely and make statepoint call to return a tuple.
unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
RegsForValue RFV(
*Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
@@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
PendingExports.push_back(Chain);
Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
} else {
- // The value of the statepoint itself will be the value of call itself.
- // We'll replace the actually call node shortly. gc_result will grab
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
// this value.
Builder.setValue(CS.getInstruction(), ReturnValue);
}
@@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// chaining stores one after another, this may allow
// a bit more optimal scheduling for them
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- MachinePointerInfo::getFixedStack(Index),
+ MachinePointerInfo::getFixedStack(
+ Builder.DAG.getMachineFunction(), Index),
false, false, 0);
Builder.StatepointLowering.setLocation(Incoming, Loc);
@@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// to the GCStrategy from there (yet).
GCStrategy &S = Builder.GFI->getStrategy();
for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
}
for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V);
+ auto Opt = S.isGCManagedPointer(V->getType());
if (Opt.hasValue()) {
assert(Opt.getValue() && "non gc managed pointer relocated");
}
@@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
- // and constants. For this values we can avoid emiting spill load while
+ // and constants. For this values we can avoid emitting spill load while
// visiting corresponding gc_relocate.
// Actually we do not need to record them in this map at all.
- // We do this only to check that we are not relocating any unvisited value.
+ // We do this only to check that we are not relocating any unvisited
+ // value.
SpillMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
// Note that it would be incorrect to teach llvm that all relocates are
- // uses of the corresponging values so that it would automatically
+ // uses of the corresponding values so that it would automatically
// export them. Relocates of the spilled values does not use original
// value.
- if (StatepointSite.getCallSite().isInvoke())
+ if (RelocateOpers.getUnderlyingCallSite().getParent() !=
+ StatepointInstr->getParent())
Builder.ExportFromCurrentBlock(V);
}
}
@@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
}
void SelectionDAGBuilder::LowerStatepoint(
- ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
+ ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint(
ImmutableCallSite CS(ISP.getCallSite());
#ifndef NDEBUG
- // Consistency check. Don't do this for invokes. It would be too
- // expensive to preserve this information across different basic blocks
- if (!CS.isInvoke()) {
- for (const User *U : CS->users()) {
- const CallInst *Call = cast<CallInst>(U);
- if (isGCRelocate(Call))
- StatepointLowering.scheduleRelocCall(*Call);
- }
+ // Consistency check. Check only relocates in the same basic block as thier
+ // statepoint.
+ for (const User *U : CS->users()) {
+ const CallInst *Call = cast<CallInst>(U);
+ if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+ StatepointLowering.scheduleRelocCall(*Call);
}
#endif
@@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Get call node, we will replace it later with statepoint
SDNode *CallNode =
- lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
+ lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// Replace original call
DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
- // Remove originall call node
+ // Remove original call node
DAG.DeleteNode(CallNode);
// DON'T set the root - under the assumption that it's already set past the
@@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
Instruction *I = cast<Instruction>(CI.getArgOperand(0));
assert(isStatepoint(I) && "first argument must be a statepoint token");
- if (isa<InvokeInst>(I)) {
- // For invokes we should have stored call result in a virtual register.
+ if (I->getParent() != CI.getParent()) {
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
// We can not use default getValue() functionality to copy value from this
// register because statepoint and actuall call return types can be
// different, and getValue() will use CopyFromReg of the wrong type,
@@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
#ifndef NDEBUG
// Consistency check
- // We skip this check for invoke statepoints. It would be too expensive to
- // preserve validation info through different basic blocks.
- if (!RelocateOpers.isTiedToInvoke()) {
+ // We skip this check for relocates not in the same basic block as thier
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
StatepointLowering.relocCallVisited(CI);
}
#endif
@@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
// Be conservative: flush all pending loads
// TODO: Probably we can be less restrictive on this,
- // it may allow more scheduling opprtunities
+ // it may allow more scheduling opportunities.
SDValue Chain = getRoot();
SDValue SpillLoad =
- DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
- false, false, false, 0);
+ DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ *DerivedPtrLocation),
+ false, false, false, 0);
// Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fbf651277c7f..c64d882d69a4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG,
RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
+ ArrayRef<SDValue> Ops,
bool isSigned, SDLoc dl,
bool doesNotReturn,
bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
+ Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
+ for (SDValue Op : Ops) {
+ Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
+
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
@@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
return LowerCallTo(CLI);
}
-
-/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
@@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
@@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
(VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
break;
- default:
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
switch (CCCode) {
- case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
- // Fallthrough
- case ISD::SETUGT:
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
- break;
- case ISD::SETUGE:
- LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
- (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
- break;
case ISD::SETULT:
- LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
break;
case ISD::SETULE:
- LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
break;
- case ISD::SETUEQ:
- LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
- (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
@@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
- SDValue Ops[2] = { NewLHS, NewRHS };
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
+
CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
@@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
}
}
-/// getJumpTableEncoding - Return the entry encoding for a jump table in the
-/// current function. The returned value is a member of the
-/// MachineJumpTableInfo::JTEntryKind enum.
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
@@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
return Table;
}
-/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-/// MCExpr.
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
@@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
-/// specified instruction is a constant integer. If so, check to see if there
-/// are any bits set in the constant that are not demanded. If so, shrink the
-/// constant and return true.
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
SDLoc dl(Op);
@@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
return false;
}
-/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
-/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
-/// cast, but it could be generalized for targets with other types of
-/// implicit widening casts.
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
bool
TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
unsigned BitWidth,
@@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
return false;
}
-/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
-/// DemandedMask bits of the result of Op are ever used downstream. If we can
-/// use this information to simplify Op, create a new simplified DAG node and
-/// return true, returning the original and new nodes in Old and New. Otherwise,
-/// analyze the expression and return a mask of KnownOne and KnownZero bits for
-/// the expression (used to simplify the caller). The KnownZero/One bits may
-/// only be accurate for those bits in the DemandedMask.
+/// Look at Op. At this point, we know that only the DemandedMask bits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of KnownOne and KnownZero bits for the expression (used to
+/// simplify the caller). The KnownZero/One bits may only be accurate for those
+/// bits in the DemandedMask.
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
const APInt &DemandedMask,
APInt &KnownZero,
@@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
@@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
-/// computeKnownBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the KnownZero/KnownOne bitsets.
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
@@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
-/// ComputeNumSignBitsForTargetNode - This method can be implemented by
-/// targets that want to expose additional information about sign bits to the
-/// DAG Combiner.
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
const SelectionDAG &,
unsigned Depth) const {
@@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
-/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
-/// one bit set. This differs from computeKnownBits in that it doesn't need to
-/// determine which bit is set.
-///
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't need to determine which bit is set.
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// A left-shift of a constant one will have exactly one bit set, because
// shifting the bit off the end is undefined.
@@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
return CN->isNullValue();
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
-/// and cc. If it is unable to simplify it, return a null SDValue.
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
@@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
@@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
PreExt = N0->getOperand(0);
} else if (N0->getOpcode() == ISD::AND) {
// DAGCombine turns costly ZExts into ANDs
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
MinBits = C->getAPIntValue().countTrailingOnes();
PreExt = N0->getOperand(0);
@@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
MinBits = N0->getOperand(0).getValueSizeInBits();
PreExt = N0->getOperand(0);
Signed = true;
- } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
// ZEXTLOAD / SEXTLOAD
if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
MinBits = LN0->getMemoryVT().getSizeInBits();
@@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
EVT ShiftTy = DCI.isBeforeLegalize()
? getPointerTy(DL)
: getShiftAmountTy(N0.getValueType(), DL);
@@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
- if (ConstantSDNode *AndRHS =
- dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
@@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Constant fold or commute setcc.
SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
if (O.getNode()) return O;
- } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
// If the RHS of an FP comparison is a constant, simplify it away in
// some cases.
if (CFP->getValueAPF().isNaN()) {
@@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// to be careful about increasing register pressure needlessly.
bool LegalRHSImm = false;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
return DAG.getSetCC(dl, VT, N0.getOperand(0),
@@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Turn (C1-X) == C2 --> X == C1-C2
- if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
return
DAG.getSetCC(dl, VT, N0.getOperand(1),
@@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
-/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + offset.
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
- if (isa<GlobalAddressSDNode>(N)) {
- GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
GA = GASD->getGlobal();
Offset += GASD->getOffset();
return true;
@@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
Offset += V->getSExtValue();
return true;
}
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
- if (V) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
Offset += V->getSExtValue();
return true;
}
@@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
return false;
}
-
-SDValue TargetLowering::
-PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
return SDValue();
}
@@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
return C_Unknown;
}
-/// LowerXConstraint - try to replace an X constraint, which matches anything,
-/// with another that has more specific requirements based on the type of the
-/// corresponding operand.
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
if (ConstraintVT.isInteger())
return "r";
@@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
return nullptr;
}
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops.
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
//===----------------------------------------------------------------------===//
// Constraint Selection.
-/// isMatchingInputConstraint - Return true of this is an input operand that is
-/// a matching constraint like "4".
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
}
-/// getMatchedOperand - If this is an input matching constraint, this method
-/// returns the output operand it matches.
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
assert(!ConstraintCode.empty() && "No known constraint!");
return atoi(ConstraintCode.c_str());
}
-
-/// ParseConstraints - Split up the constraint string from the inline
-/// assembly value into the specific constraints and their prefixes,
-/// and also tie in the associated operand values.
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
ImmutableCallSite CS) const {
- /// ConstraintOperands - Information about all of the constraints.
+ /// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
@@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
" incompatible type!");
}
}
-
}
}
return ConstraintOperands;
}
-
-/// getConstraintGenerality - Return an integer indicating how general CT
-/// is.
+/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Other:
@@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight
return weight;
}
-/// ChooseConstraint - If there are multiple different constraints that we
-/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
/// Other -> immediates and magic values
/// Register -> one specific register
@@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
OpInfo.ConstraintType = BestType;
}
-/// ComputeConstraintToUse - Determines the constraint code and constraint
-/// type to use for the specific AsmOperandInfo, setting
-/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
SelectionDAG *DAG) const {
@@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
return Mul;
}
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
/// \brief Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ if (!EmuTlsVar)
+ EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
+ VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true); // Is this only for X86 target?
+ MFI->setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}