aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-04-14 13:54:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-04-14 13:54:10 +0000
commit63faed5b8e4f2755f127fcb8aa440480c0649327 (patch)
tree19c69a04768629f2d440944b71cbe90adae0b615 /lib/CodeGen/SelectionDAG
parentd4c8b5d2e851b0e8a063c6bf8543a4823a26c15a (diff)
downloadsrc-63faed5b8e4f2755f127fcb8aa440480c0649327.tar.gz
src-63faed5b8e4f2755f127fcb8aa440480c0649327.zip
Vendor import of llvm trunk r154661:vendor/llvm/llvm-trunk-r154661
Notes
Notes: svn path=/vendor/llvm/dist/; revision=234285 svn path=/vendor/llvm/llvm-trunk-r154661/; revision=234286; tag=vendor/llvm/llvm-trunk-r154661
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt14
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1120
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp215
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/LLVMBuild.txt22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1251
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp134
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp140
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp163
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp657
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp650
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp130
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h40
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp (renamed from lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp)85
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1050
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp629
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h18
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp631
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp215
27 files changed, 4721 insertions, 2974 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 2282f0e6eb83..a6bdc3be32e0 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG
LegalizeTypesGeneric.cpp
LegalizeVectorOps.cpp
LegalizeVectorTypes.cpp
+ ResourcePriorityQueue.cpp
ScheduleDAGFast.cpp
- ScheduleDAGList.cpp
ScheduleDAGRRList.cpp
ScheduleDAGSDNodes.cpp
SelectionDAG.cpp
SelectionDAGBuilder.cpp
+ SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMSelectionDAG
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b878688df63..d1b998f8d840 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,7 +22,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -64,7 +63,24 @@ namespace {
bool LegalTypes;
// Worklist of all of the nodes that need to be simplified.
- std::vector<SDNode*> WorkList;
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ SmallVector<SDNode*, 64> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -84,18 +100,17 @@ namespace {
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure it's instance is at the
- /// the back (next to be processed.)
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
void AddToWorkList(SDNode *N) {
- removeFromWorkList(N);
- WorkList.push_back(N);
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
- WorkList.end());
+ WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -159,7 +174,9 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
@@ -181,7 +198,9 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
@@ -279,7 +298,7 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
/// Run - runs the dag combiner on all nodes in the work list
@@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
if (Op.getValueType() == MVT::ppcf128)
@@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
- if (HonorSignDependentRoundingFPMath()) return 0;
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
return V;
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
}
}
@@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!HonorSignDependentRoundingFPMath());
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
- LegalOperations = Level >= NoIllegalOperations;
- LegalTypes = Level >= NoIllegalTypes;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- WorkList.reserve(DAG.allnodes_size());
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- WorkList.push_back(I);
+ AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
- // while the worklist isn't empty, inspect the node on the end of it and
+ // while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkList.empty()) {
- SDNode *N = WorkList.back();
- WorkList.pop_back();
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.pop_back_val();
+ } while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
@@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
@@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
}
}
@@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
- if (N->hasNUsesOfValue(0, 1))
- return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
@@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
- if (N1C && N1C->getSExtValue() == 1LL)
+ if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
@@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
- (isPowerOf2_64(N1C->getSExtValue()) ||
- isPowerOf2_64(-N1C->getSExtValue()))) {
+ if (N1C && !N1C->isNullValue() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
return SDValue();
- int64_t pow2 = N1C->getSExtValue();
- int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
- unsigned lg2 = Log2_64(abs2);
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
@@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
- if (pow2 > 0)
+ if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorkList(SRA.getNode());
@@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
- if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
- !TLI.isIntDivCheap()) {
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
@@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ORNode, N0.getOperand(1));
}
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ && Level == AfterLegalizeVectorOps) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ // If both incoming values are integers, and the original types are the same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
- if (N1C && N0.getOpcode() == ISD::SRL &&
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
@@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
- APInt UnknownBits = ~KnownZero & Mask;
+ APInt UnknownBits = ~KnownZero;
if (UnknownBits == 0) return DAG.getConstant(1, VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
@@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
}
}
@@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return SDValue();
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
N0.getOperand(0));
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV != 0 && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal) {
+ return DAG.getConstant(NewVal, V.getValueType());
+ }
+ break;
+ }
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
@@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), NewAlign);
else
Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
@@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, MVT::i32));
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
LD1->getBasePtr(), LD1->getPointerInfo(),
- false, false, Align);
+ false, false, false, Align);
}
return SDValue();
@@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
- OrigAlign);
+ LN0->isInvariant(), OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
@@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
- if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
@@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
// fold (fadd A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if (isNegatibleForFree(N1, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
- if (isNegatibleForFree(N0, LegalOperations) == 2)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
@@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
// fold (fsub A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N0;
// fold (fsub 0, B) -> -B
- if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+ // If 'unsafe math' is enabled, fold
+ // (fsub x, (fadd x, y)) -> (fneg y) &
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+ else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
return SDValue();
}
@@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
// fold (fmul A, 0) -> 0
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N1;
// fold (fmul A, 0) -> 0, vector edition.
- if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// fold vector ops
if (VT.isVector()) {
@@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
// fold (sint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// fold (uint_to_fp c1) -> c1fp
if (N0C && OpVT != MVT::ppcf128 &&
// ...but only if the target supports immediate floating-point values
- (Level == llvm::Unrestricted ||
+ (!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
@@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (isNegatibleForFree(N0, LegalOperations))
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST &&
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
/// pre-indexed load / store when the base pointer is an add or subtract
/// and it has other uses besides the load / store. After the
@@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
/// the add / subtract in and all of its other uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
- if (!((Use->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
- (Use->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
@@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
/// load / store effectively and all of its uses are redirected to the
/// new load / store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
- if (!LegalOperations)
+ if (Level < AfterLegalizeDAG)
return false;
bool isLoad = true;
@@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
continue;
// Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr.
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
@@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
for (SDNode::use_iterator III = Use->use_begin(),
EEE = Use->use_end(); III != EEE; ++III) {
SDNode *UseUse = *III;
- if (!((UseUse->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
- (UseUse->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
@@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (!LD->isVolatile()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
- if (N->hasNUsesOfValue(0, 0)) {
+ if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
@@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
@@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
LD->getValueType(0),
@@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
- NewAlign);
+ LD->isInvariant(), NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
@@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(),
- false, false, LDAlign);
+ false, false, false, LDAlign);
SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
NewLD, ST->getBasePtr(),
@@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
assert(InOp.getValueType().isInteger() && NVT.isInteger());
return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return InOp;
}
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD patterns.
+ // For example on AVX, extracting elements from a wide vector without using
+ // extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, MVT::i32));
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
- SDValue EltNo = N->getOperand(1);
- if (isa<ConstantSDNode>(EltNo)) {
+ if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
- EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
@@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BITCAST)
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
InVec = InVec.getOperand(0);
+ }
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
}
}
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
@@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
DAG.getConstant(PtrOff, PtrType));
}
- return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(), Align);
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (NVT.bitsGT(LVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+ ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+ NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->isInvariant(), Align);
+ Chain = Load.getValue(1);
+ if (NVT.bitsLT(LVT))
+ Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ // Since we're explcitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(N);
+ return SDValue(N, 0);
}
return SDValue();
@@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+ bool AllUndef = true;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.getOpcode() == ISD::UNDEF) continue;
+ AllUndef = false;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(VT);
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = N->getValueType(0).getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+ ValidTypes) {
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i=0; i < N->getNumOperands(); ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ }
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
SDValue VecIn1, VecIn2;
for (unsigned i = 0; i != NumInScalars; ++i) {
// Ignore undef inputs.
@@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
break;
}
- // If the input vector type disagrees with the result of the build_vector,
- // we can't make a shuffle.
+ // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
- if (ExtractedFromVec.getValueType() != VT) {
- VecIn1 = VecIn2 = SDValue(0, 0);
- break;
- }
-
- // Otherwise, remember this. We allow up to two distinct input vectors.
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
@@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Mask.push_back(Idx+NumInScalars);
}
- // Add count and size info.
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
- Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ Ops[1] = VecIn2;
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
}
@@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indicies are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- //
- SDValue InsIdx = N->getOperand(1);
- SDValue ExtIdx = V->getOperand(2);
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx == ExtIdx)
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
}
return SDValue();
@@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- assert(N0.getValueType().getVectorNumElements() == NumElts &&
- "Vector shuffle must be normalized in DAG");
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
- // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
// If it is a splat, check if the argument vector is another splat or a
// build_vector with all scalar elements the same.
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
}
}
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
return SDValue();
}
@@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
- else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
Indices.push_back(NumElts);
@@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
EVT VT = LHSOp.getValueType();
- assert(RHSOp.getValueType() == VT &&
- "SimplifyVBinOp with different BUILD_VECTOR element types");
+ EVT RVT = RHSOp.getValueType();
+ if (RVT != VT) {
+ // Integer BUILD_VECTOR operands may have types larger than the element
+ // size (e.g., when the element type is not legal). Prior to type
+ // legalization, the types may not match between the two BUILD_VECTORS.
+ // Truncate one of the operands to make them match.
+ if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+ RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+ } else {
+ LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+ VT = RVT;
+ }
+ }
SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
@@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) &&
(LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
- (LLD->hasAnyUseOfValue(1) &&
- (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
return false;
Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
@@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// FIXME: Discards pointer info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
- LLD->getAlignment());
+ LLD->isInvariant(), LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
@@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
- false, Alignment);
+ false, false, Alignment);
}
}
@@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
- N0.getValueType().isInteger() &&
- N2.getValueType().isInteger() &&
(N1C->isNullValue() || // (a < 0) ? b : 0
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
EVT XType = N0.getValueType();
@@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+ SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
ii != ee; ++ii)
@@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
- int &SrcValueOffset,
- unsigned &SrcValueAlign,
- const MDNode *&TBAAInfo) const {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- Ptr = LD->getBasePtr();
- Size = LD->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = LD->getSrcValue();
- SrcValueOffset = LD->getSrcValueOffset();
- SrcValueAlign = LD->getOriginalAlignment();
- TBAAInfo = LD->getTBAAInfo();
- return true;
- }
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- Ptr = ST->getBasePtr();
- Size = ST->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = ST->getSrcValue();
- SrcValueOffset = ST->getSrcValueOffset();
- SrcValueAlign = ST->getOriginalAlignment();
- TBAAInfo = ST->getTBAAInfo();
- return false;
- }
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e8f8c73d6883..0c1ac6982d2a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "isel"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
@@ -58,8 +59,15 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
/// startNewBlock - Set the current block to which generated machine
/// instructions will be appended, and clear the local CSE map.
///
@@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!hasTrivialKill(Cast->getOperand(0)))
return false;
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
// Only instructions with a single use in the same basic block are considered
// to have trivial kills.
return I->hasOneUse() &&
@@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return 0;
}
- // Look up the value to see if we already have a register for it. We
- // cache values defined by Instructions across blocks, and other values
- // only locally. This is because Instructions already have the SSA
- // def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end())
- return I->second;
-
- unsigned Reg = LocalValueMap[V];
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
if (Reg != 0)
return Reg;
@@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
(void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
@@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
}
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
DebugLoc OldDL = DL;
@@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::SRA;
}
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (ResultReg == 0) return false;
@@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
bool NIsKill = hasTrivialKill(I->getOperand(0));
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
- // FIXME: This can be optimized by combining the add with a
- // subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
- NIsKill = true;
+ TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
}
Ty = StTy->getElementType(Field);
} else {
@@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ // N = N + Offset
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+ if (TotalOffs >= MaxOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
- continue;
+ TotalOffs = 0;
}
// N = N + Idx * ElementSize;
@@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return false;
}
}
+ if (TotalOffs) {
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, N);
@@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
switch (F->getIntrinsicID()) {
default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
- !FuncInfo.MF->getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
const Value *Address = DI->getAddress();
- if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
+ }
unsigned Reg = 0;
unsigned Offset = 0;
@@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) {
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
}
if (!Reg)
- Reg = getRegForValue(Address);
+ Reg = lookUpRegForValue(Address);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Reg = FuncInfo.InitializeRegForValue(Address);
if (Reg)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset)
.addMetadata(DI->getVariable());
+ else
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return true;
}
case Intrinsic::dbg_value: {
@@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) {
}
return true;
}
- case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
- break;
-
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(Call, ResultReg);
- return true;
- }
- case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
- break;
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(Call);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(Call);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
-
- UpdateValueMap(Call, ResultReg);
-
- return true;
- }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) {
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
- TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) {
DL = I->getDebugLoc();
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
// Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
if (TargetSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
DL = DebugLoc();
return true;
}
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DL = DebugLoc();
return false;
@@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) {
/// the CFG.
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
- if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // The unconditional fall-through case, which needs no instructions.
+
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
@@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
- // Promote MVT::i1.
- if (VT == MVT::i1)
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b052740a1abe..8dde919079d9 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
+ Fn->isVarArg(),
Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
@@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
// candidate. I.e., it would trigger the creation of a stack protector.
bool MayNeedSP =
(AI->isArrayAllocation() ||
- (TySize > 8 && isa<ArrayType>(Ty) &&
+ (TySize >= 8 && isa<ArrayType>(Ty) &&
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
/// argument. This overrides previous frame index entry for this argument,
/// if any.
void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
- int FI) {
+ int FI) {
ByValArgFrameIndexMap[A] = FI;
}
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return 0;
}
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
- SmallPtrSet<const BasicBlock*, 4> Visited;
-
- // The 'eh.selector' call may not be in the direct successor of a basic block,
- // but could be several successors deeper. If we don't find it, try going one
- // level further. <rdar://problem/8824861>
- while (Visited.insert(SuccBB)) {
- for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
- I != E; ++I)
- if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
- // Apply the catch info to LPad.
- AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
- if (!FLI.MBBMap[SuccBB]->isLandingPad())
- FLI.CatchInfoFound.insert(EHSel);
-#endif
- return;
- }
-
- const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
- if (Br && Br->isUnconditional())
- SuccBB = Br->getSuccessor(0);
- else
- break;
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8f8715..1467d887789c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TII->getRegClass(*II, IIOpNum, TRI);
- assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+ assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
if ((i & 1) == 0) {
- unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
- unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (SRC && SRC != RC) {
- MRI->setRegClass(NewVReg, SRC);
- RC = SRC;
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
}
AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
- // The MachineInstr constructor adds implicit-def operands. Scan through
- // these to determine which are dead.
- if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
- // First, collect all used registers.
- SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
- if (F->getOpcode() == ISD::CopyFromReg)
- UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
- else {
- // Collect declared implicit uses.
- const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
- // In addition to declared implicit uses, we must also check for
- // direct RegisterSDNode operands.
- for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
- if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- UsedRegs.push_back(Reg);
- }
- }
- // Then mark unused registers as dead.
- MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
- }
-
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
- if (Node->hasAnyUseOfValue(i))
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
- // If there are no uses, mark the register as dead now, so that
- // MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Glue value, for the benefit of targets still using
- // Glue for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- MI->addRegisterDead(Reg, TRI);
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
}
}
- // If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any glue outputs, we don't do this
- // because we don't know what implicit defs are being used by glued nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- if (const unsigned *IDList = II.getImplicitDefs()) {
- for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
- i != e; ++i)
- MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
}
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
#ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->dump();
#endif
llvm_unreachable("This target-independent node should have been selected!");
- break;
case ISD::EntryToken:
llvm_unreachable("EntryToken should have been excluded from the schedule!");
- break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
new file mode 100644
index 000000000000..81d2e000a2e8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SelectionDAG
+parent = CodeGen
+required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63255ae2ebd9..a96a99781f4e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -46,37 +46,18 @@ using namespace llvm;
/// will attempt merge setcc and brc instructions into brcc's.
///
namespace {
-class SelectionDAGLegalize {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- // Libcall insertion helpers.
-
- /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
- /// legalized. We use this to ensure that calls are properly serialized
- /// against each other, including inserted libcalls.
- SDValue LastCALLSEQ_END;
-
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
- /// LegalizedNodes - For nodes that are of legal width, and that have more
- /// than one use, this map indicates what regularized operand to use. This
- /// allows us to avoid legalizing the same thing more than once.
- DenseMap<SDValue, SDValue> LegalizedNodes;
+ /// LegalizePosition - The iterator for walking through the node list.
+ SelectionDAG::allnodes_iterator LegalizePosition;
- void AddLegalizedOperand(SDValue From, SDValue To) {
- LegalizedNodes.insert(std::make_pair(From, To));
- // If someone requests legalization of the new node, return itself.
- if (From != To)
- LegalizedNodes.insert(std::make_pair(To, To));
+ /// LegalizedNodes - The set of nodes which have already been legalized.
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
- // Transfer SDDbgValues.
- DAG.TransferDbgValues(From, To);
- }
+ // Libcall insertion helpers.
public:
explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -84,9 +65,8 @@ public:
void LegalizeDAG();
private:
- /// LegalizeOp - Return a legal replacement for the given operation, with
- /// all legal operands.
- SDValue LegalizeOp(SDValue O);
+ /// LegalizeOp - Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -105,10 +85,7 @@ private:
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const;
-
- bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+ ArrayRef<int> Mask) const;
void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
DebugLoc dl);
@@ -150,10 +127,46 @@ private:
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
- void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandNode(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+ void ForgetNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+ ++LegalizePosition;
+ }
+
+public:
+ // DAGUpdateListener implementation.
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ ForgetNode(N);
+ }
+ virtual void NodeUpdated(SDNode *N) {}
+
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ if (N->use_empty()) {
+ DAG.RemoveDeadNode(N, this);
+ } else {
+ ForgetNode(N);
+ }
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DAG.ReplaceAllUsesWith(Old, New, this);
+ ReplacedNode(Old);
+ }
};
}
@@ -164,7 +177,7 @@ private:
SDValue
SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const {
+ ArrayRef<int> Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
unsigned NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
}
void SelectionDAGLegalize::LegalizeDAG() {
- LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
-
- // The legalize process is inherently a bottom-up recursive process (users
- // legalize their uses before themselves). Given infinite stack space, we
- // could just start legalizing on the root and traverse the whole graph. In
- // practice however, this causes us to run out of stack space on large basic
- // blocks. To avoid this problem, compute an ordering of the nodes where each
- // node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
- for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
- LegalizeOp(SDValue(I, 0));
- // Finally, it's possible the root changed. Get the new root.
- SDValue OldRoot = DAG.getRoot();
- assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
- DAG.setRoot(LegalizedNodes[OldRoot]);
-
- LegalizedNodes.clear();
-
- // Remove dead nodes now.
- DAG.RemoveDeadNodes();
-}
-
-
-/// FindCallEndFromCallStart - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- // Nested CALLSEQ_START/END constructs aren't yet legal,
- // but we can DTRT and handle them correctly here.
- if (Node->getOpcode() == ISD::CALLSEQ_START)
- depth++;
- else if (Node->getOpcode() == ISD::CALLSEQ_END) {
- depth--;
- if (depth == 0)
- return Node;
- }
- if (Node->use_empty())
- return 0; // No CallSeqEnd
-
- // The chain is usually at the end.
- SDValue TheChain(Node, Node->getNumValues()-1);
- if (TheChain.getValueType() != MVT::Other) {
- // Sometimes it's at the beginning.
- TheChain = SDValue(Node, 0);
- if (TheChain.getValueType() != MVT::Other) {
- // Otherwise, hunt for it.
- for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
- if (Node->getValueType(i) == MVT::Other) {
- TheChain = SDValue(Node, i);
- break;
- }
-
- // Otherwise, we walked into a node without a chain.
- if (TheChain.getValueType() != MVT::Other)
- return 0;
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (LegalizePosition = DAG.allnodes_end();
+ LegalizePosition != DAG.allnodes_begin(); ) {
+ --LegalizePosition;
+
+ SDNode *N = LegalizePosition;
+ if (LegalizedNodes.insert(N)) {
+ AnyLegalized = true;
+ LegalizeOp(N);
+ }
}
- }
-
- for (SDNode::use_iterator UI = Node->use_begin(),
- E = Node->use_end(); UI != E; ++UI) {
-
- // Make sure to only follow users of our token chain.
- SDNode *User = *UI;
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
- if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, depth))
- return Result;
- }
- return 0;
-}
-
-/// FindCallStartFromCallEnd - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_START node that initiates the call sequence.
-static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
- int nested = 0;
- assert(Node && "Didn't find callseq_start for a call??");
- while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
- Node = Node->getOperand(0).getNode();
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- switch (Node->getOpcode()) {
- default:
+ if (!AnyLegalized)
break;
- case ISD::CALLSEQ_START:
- if (!nested)
- return Node;
- nested--;
- break;
- case ISD::CALLSEQ_END:
- nested++;
- break;
- }
- }
- return 0;
-}
-
-/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
-/// see if any uses can reach Dest. If no dest operands can get to dest,
-/// legalize them, legalize ourself, and return false, otherwise, return true.
-///
-/// Keep track of the nodes we fine that actually do lead to Dest in
-/// NodesLeadingTo. This avoids retraversing them exponential number of times.
-///
-bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
- SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
- if (N == Dest) return true; // N certainly leads to Dest :)
-
- // If we've already processed this node and it does lead to Dest, there is no
- // need to reprocess it.
- if (NodesLeadingTo.count(N)) return true;
-
- // If the first result of this node has been already legalized, then it cannot
- // reach N.
- if (LegalizedNodes.count(SDValue(N, 0))) return false;
-
- // Okay, this node has not already been legalized. Check and legalize all
- // operands. If none lead to Dest, then we can legalize this node.
- bool OperandsLeadToDest = false;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- OperandsLeadToDest |= // If an operand leads to Dest, so do we.
- LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
- NodesLeadingTo);
- if (OperandsLeadToDest) {
- NodesLeadingTo.insert(N);
- return true;
}
- // Okay, this node looks safe, legalize it and return false.
- LegalizeOp(SDValue(N, 0));
- return false;
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
}
/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
-static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
- SelectionDAG &DAG, const TargetLowering &TLI) {
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
bool Extend = false;
DebugLoc dl = CFP->getDebugLoc();
@@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
- DAG.getEntryNode(),
- CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, Alignment);
- return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(), false, false,
- Alignment);
+ if (Extend) {
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+ DAG.getEntryNode(),
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
+ return Result;
+ }
+ SDValue Result =
+ DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(), false, false, false,
+ Alignment);
+ return Result;
}
/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
-static
-SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SelectionDAGLegalize *DAGLegalize) {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
@@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load one integer register's worth from the stack slot.
SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// The order of the stores doesn't matter - say it with a TokenFactor.
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+ return;
}
assert(ST->getMemoryVT().isInteger() &&
!ST->getMemoryVT().isVector() &&
@@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
}
/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
-static
-SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ SDValue &ValResult, SDValue &ChainResult) {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
- SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = Chain;
+ return;
}
// Copy the value to a (aligned) stack slot using (unaligned) integer
@@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
@@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
MachinePointerInfo(), LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Load;
+ ChainResult = TF;
+ return;
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
- SDValue Ops[] = { Result, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ ValResult = Result;
+ ChainResult = TF;
}
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
@@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false,
+ false, 0);
}
@@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
-SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
- if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
- return Op;
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
- SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- if (I != LegalizedNodes.end()) return I->second;
-
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- SDValue Result = Op;
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
@@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
- case ISD::VAARG:
case ISD::STACKSAVE:
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
- case ISD::BUILD_VECTOR:
- // A weird case: legalization for BUILD_VECTOR never legalizes the
- // operands!
- // FIXME: This really sucks... changing it isn't semantically incorrect,
- // but it massively pessimizes the code for floating-point BUILD_VECTORs
- // because ConstantFP operands get legalized into constant pool loads
- // before the BUILD_VECTOR code can see them. It doesn't usually bite,
- // though, because BUILD_VECTORS usually get lowered into other nodes
- // which get legalized properly.
- SimpleFinishLegalizing = false;
- break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
if (SimpleFinishLegalizing) {
- SmallVector<SDValue, 8> Ops, ResultVals;
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Ops.push_back(Node->getOperand(i));
switch (Node->getOpcode()) {
default: break;
- case ISD::BR:
- case ISD::BRIND:
- case ISD::BR_JT:
- case ISD::BR_CC:
- case ISD::BRCOND:
- // Branches tweak the chain to include LastCALLSEQ_END
- Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
- Ops[0] = LegalizeOp(Ops[0]);
- LastCALLSEQ_END = DAG.getEntryNode();
- break;
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
@@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ROTR:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[1].getValueType().isVector())
- Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[1]));
+ if (!Ops[1].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[1] = Handle.getValue();
+ }
break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
case ISD::SHL_PARTS:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[2].getValueType().isVector())
- Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
- Ops[2]));
+ if (!Ops[2].getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ Ops[2] = Handle.getValue();
+ }
break;
}
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
- Ops.size()), 0);
+ SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ if (NewNode != Node) {
+ DAG.ReplaceAllUsesWith(Node, NewNode, this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+ ReplacedNode(Node);
+ Node = NewNode;
+ }
switch (Action) {
case TargetLowering::Legal:
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- ResultVals.push_back(Result.getValue(i));
- break;
+ return;
case TargetLowering::Custom:
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(Result, DAG);
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp1.getNode()) {
+ SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
ResultVals.push_back(Tmp1);
else
ResultVals.push_back(Tmp1.getValue(i));
}
- break;
+ if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+ ReplacedNode(Node);
+ }
+ return;
}
// FALL THROUGH
case TargetLowering::Expand:
- ExpandNode(Result.getNode(), ResultVals);
- break;
+ ExpandNode(Node);
+ return;
case TargetLowering::Promote:
- PromoteNode(Result.getNode(), ResultVals);
- break;
- }
- if (!ResultVals.empty()) {
- for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
- if (ResultVals[i] != SDValue(Node, i))
- ResultVals[i] = LegalizeOp(ResultVals[i]);
- AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
- }
- return ResultVals[Op.getResNo()];
+ PromoteNode(Node);
+ return;
}
}
@@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Node->dump( &DAG);
dbgs() << "\n";
#endif
- assert(0 && "Do not know how to legalize this operator!");
+ llvm_unreachable("Do not know how to legalize this operator!");
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL: {
- // Scalarize vector SRA/SRL/SHL.
- EVT VT = Node->getValueType(0);
- assert(VT.isVector() && "Unable to legalize non-vector shift");
- assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
- unsigned NumElem = VT.getVectorNumElements();
-
- SmallVector<SDValue, 8> Scalars;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(0), DAG.getIntPtrConstant(Idx));
- SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(1), DAG.getIntPtrConstant(Idx));
- Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
- VT.getScalarType(), Ex, Sh));
- }
- Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Scalars[0], Scalars.size());
- break;
- }
-
- case ISD::BUILD_VECTOR:
- switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Result = Tmp3;
- break;
- }
- // FALLTHROUGH
- case TargetLowering::Expand:
- Result = ExpandBUILD_VECTOR(Result.getNode());
- break;
- }
- break;
- case ISD::CALLSEQ_START: {
- SDNode *CallEnd = FindCallEndFromCallStart(Node);
-
- // Recursively Legalize all of the inputs of the call end that do not lead
- // to this call start. This ensures that any libcalls that need be inserted
- // are inserted *before* the CALLSEQ_START.
- {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
- for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
- LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
- NodesLeadingTo);
- }
-
- // Now that we have legalized all of the inputs (which may have inserted
- // libcalls), create the new CALLSEQ_START node.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
-
- // Merge in the last call to ensure that this call starts after the last
- // call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
- Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, LastCALLSEQ_END);
- Tmp1 = LegalizeOp(Tmp1);
- }
-
- // Do not try to legalize the target-specific arguments (#1+).
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
- Ops.size()), Result.getResNo());
- }
-
- // Remember that the CALLSEQ_START is legalized.
- AddLegalizedOperand(Op.getValue(0), Result);
- if (Node->getNumValues() == 2) // If this has a flag result, remember it.
- AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
-
- // Now that the callseq_start and all of the non-call nodes above this call
- // sequence have been legalized, legalize the call itself. During this
- // process, no libcalls can/will be inserted, guaranteeing that no calls
- // can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
- // Note that we are selecting this call!
- LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
-
- // Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
- return Result;
- }
+ case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
- // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
- // will cause this node to be legalized as well as handling libcalls right.
- if (LastCALLSEQ_END.getNode() != Node) {
- LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
- }
-
- // Otherwise, the call start has been legalized and everything is going
- // according to plan. Just legalize ourselves normally here.
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
- // Do not try to legalize the target-specific arguments (#1+), except for
- // an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
- if (Tmp1 != Node->getOperand(0)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- } else {
- Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
- if (Tmp1 != Node->getOperand(0) ||
- Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
- SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
- Ops[0] = Tmp1;
- Ops.back() = Tmp2;
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- &Ops[0], Ops.size()),
- Result.getResNo());
- }
- }
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
- // This finishes up call legalization.
- IsLegalizingCall = false;
-
- // If the CALLSEQ_END node has a flag, remember that we legalized it.
- AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
- if (Node->getNumValues() == 2)
- AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
- return Result.getValue(Op.getResNo());
+ break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+ Tmp1 = LD->getChain(); // Legalize the chain.
+ Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
EVT VT = Node->getValueType(0);
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp3 = Result.getValue(0);
- Tmp4 = Result.getValue(1);
+ Tmp3 = SDValue(Node, 0);
+ Tmp4 = SDValue(Node, 1);
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned load and the target doesn't support it,
// expand it.
@@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp3 = Result.getOperand(0);
- Tmp4 = Result.getOperand(1);
- Tmp3 = LegalizeOp(Tmp3);
- Tmp4 = LegalizeOp(Tmp4);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp3, Tmp4);
}
}
break;
case TargetLowering::Custom:
Tmp1 = TLI.LowerOperation(Tmp3, DAG);
if (Tmp1.getNode()) {
- Tmp3 = LegalizeOp(Tmp1);
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ Tmp3 = Tmp1;
+ Tmp4 = Tmp1.getValue(1);
}
break;
case TargetLowering::Promote: {
@@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
- Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ LD->isInvariant(), LD->getAlignment());
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
+ Tmp4 = Tmp1.getValue(1);
break;
}
}
- // Since loads produce two values, make sure to remember that we
- // legalized both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp3);
- AddLegalizedOperand(SDValue(Node, 1), Tmp4);
- return Op.getResNo() ? Tmp4 : Tmp3;
+ if (Tmp4.getNode() != Node) {
+ assert(Tmp3.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
+ ReplacedNode(Node);
+ }
+ return;
}
EVT SrcVT = LD->getMemoryVT();
@@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
Ch = Result.getValue(1); // The chain.
@@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getValueType(), Result,
DAG.getValueType(SrcVT));
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp1 = Result;
+ Tmp2 = Ch;
} else if (SrcWidth & (SrcWidth - 1)) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
@@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
} else {
// Big endian - avoid unaligned loads.
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
@@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getShiftAmountTy(Hi.getValueType())));
// Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
}
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
+ Tmp2 = Ch;
} else {
switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
// FALLTHROUGH
case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
+ Tmp1 = SDValue(Node, 0);
+ Tmp2 = SDValue(Node, 1);
if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
+ Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ Tmp1 = Tmp3;
+ Tmp2 = Tmp3.getValue(1);
}
} else {
// If this is an unaligned load and the target doesn't support it,
@@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
unsigned ABIAlignment =
TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Tmp1, Tmp2);
}
}
}
@@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
@@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
default: llvm_unreachable("Unexpected extend load type!");
}
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp2 = Load.getValue(1);
break;
}
- // If this is a promoted vector load, and the vector element types are
- // legal, then scalarize it.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
- TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- Node->getValueType(0).getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
-
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Node->getValueType(0), &LoadVals[0], LoadVals.size());
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
- }
-
- // If this is a promoted vector load, and the vector element types are
- // illegal, create the promoted vector from bitcasted segments.
- if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
- EVT MemElemTy = Node->getValueType(0).getScalarType();
- EVT SrcSclrTy = SrcVT.getScalarType();
- unsigned SizeRatio =
- (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
-
- SmallVector<SDValue, 8> LoadVals;
- SmallVector<SDValue, 8> LoadChains;
- unsigned NumElem = SrcVT.getVectorNumElements();
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
- SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
- SrcVT.getScalarType(),
- Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- if (TLI.isBigEndian()) {
- // MSB (which is garbage, comes first)
- LoadVals.push_back(ScalarLoad.getValue(0));
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- } else {
- // LSB (which is data, comes first)
- for (unsigned i = 0; i<SizeRatio-1; ++i)
- LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
- LoadVals.push_back(ScalarLoad.getValue(0));
- }
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
- EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NumElem*SizeRatio);
- SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
- TempWideVector, &LoadVals[0], LoadVals.size());
-
- // Cast to the correct type
- ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
-
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
- break;
-
- }
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
// FIXME: This does not work for vectors on most targets. Sign- and
// zero-extend operations are currently folded into extending loads,
@@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
"EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result, DAG.getValueType(SrcVT));
else
ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Tmp1 = ValRes;
+ Tmp2 = Result.getValue(1);
break;
}
}
// Since loads produce two values, make sure to remember that we legalized
// both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
+ if (Tmp2.getNode() != Node) {
+ assert(Tmp1.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
+ ReplacedNode(Node);
+ }
+ break;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
- Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ Tmp1 = ST->getChain();
+ Tmp2 = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- Result = SDValue(OptStore, 0);
+ ReplaceNode(ST, OptStore);
break;
}
{
- Tmp3 = LegalizeOp(ST->getValue());
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
+ Tmp3 = ST->getValue();
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Result, DAG);
- if (Tmp1.getNode()) Result = Tmp1;
+ Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Tmp1.getNode())
+ ReplaceNode(SDValue(Node, 0), Tmp1);
break;
- case TargetLowering::Promote:
+ case TargetLowering::Promote: {
assert(VT.isVector() && "Unknown legal promote case!");
Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ }
break;
}
} else {
- Tmp3 = LegalizeOp(ST->getValue());
+ Tmp3 = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
@@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
// The order of the stores doesn't matter.
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
} else {
- if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
- Tmp2 != ST->getBasePtr())
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp3, Tmp2,
- ST->getOffset()),
- Result.getResNo());
-
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
- Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
- DAG, TLI);
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
case TargetLowering::Custom:
- Result = TLI.LowerOperation(Result, DAG);
+ ReplaceNode(SDValue(Node, 0),
+ TLI.LowerOperation(SDValue(Node, 0), DAG));
break;
case TargetLowering::Expand:
-
- EVT WideScalarVT = Tmp3.getValueType().getScalarType();
- EVT NarrowScalarVT = StVT.getScalarType();
-
- if (StVT.isVector()) {
- unsigned NumElem = StVT.getVectorNumElements();
- // The type of the data we want to save
- EVT RegVT = Tmp3.getValueType();
- EVT RegSclVT = RegVT.getScalarType();
- // The type of data as saved in memory.
- EVT MemSclVT = StVT.getScalarType();
-
- bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
- bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
-
- // We need to expand this store. If the register element type
- // is legal then we can scalarize the vector and use
- // truncating stores.
- if (RegScalarLegal) {
- // Cast floats into integers
- unsigned ScalarSize = MemSclVT.getSizeInBits();
- EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
-
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
-
- // Store Stride in bytes
- unsigned Stride = ScalarSize/8;
- // Extract each of the elements from the original vector
- // and save them into memory individually.
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // This scalar TruncStore may be illegal, but we lehalize it
- // later.
- SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment);
-
- Stores.push_back(Store);
- }
-
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- // The scalar register type is illegal.
- // For example saving <2 x i64> -> <2 x i32> on a x86.
- // In here we bitcast the value into a vector of smaller parts and
- // save it using smaller scalars.
- if (!RegScalarLegal && MemScalarLegal) {
- // Store Stride in bytes
- unsigned Stride = MemSclVT.getSizeInBits()/8;
-
- unsigned SizeRatio =
- (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
-
- EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
- MemSclVT,
- SizeRatio * NumElem);
-
- // Cast the wide elem vector to wider vec with smaller elem type.
- // Example <2 x i64> -> <4 x i32>
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
- // Extract the Ith element.
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
- // Bump pointer.
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(Stride));
-
- // Store if, this element is:
- // - First element on big endian, or
- // - Last element on little endian
- if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
- ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
- SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
- ST->getPointerInfo().getWithOffset(Idx*Stride),
- isVolatile, isNonTemporal, Alignment);
- Stores.push_back(Store);
- }
- }
- Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
- break;
- }
-
- assert(false && "Unable to legalize the vector trunc store!");
- }// is vector
-
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
// TRUNCSTORE:i16 i32 -> STORE i16
assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ SDValue Result =
+ DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
break;
}
}
@@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
}
- assert(Result.getValueType() == Op.getValueType() &&
- "Bad legalization!");
-
- // Make sure that the generated code is itself legal.
- if (Result != Op)
- Result = LegalizeOp(Result);
-
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- AddLegalizedOperand(Op, Result);
- return Result;
}
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
if (Op.getValueType().isVector())
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType(),
@@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
- false, false, 0);
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
EVT OpVT = LHS.getValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: assert(0 && "Unknown condition code action!");
+ default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
@@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
- default: assert(0 && "Don't know how to expand this condition!");
+ default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
@@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
- false, false, DestAlign);
+ false, false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
@@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
MachinePointerInfo::getFixedStack(SPFI),
- false, false, 0);
+ false, false, false, 0);
}
@@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// If all elements are constants, create a load from the constant pool.
if (isConstant) {
- std::vector<Constant*> CV;
+ SmallVector<Constant*, 16> CV;
for (unsigned i = 0, e = NumElems; i != e; ++i) {
if (ConstantFPSDNode *V =
dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
}
if (!MoreThanTwoValues) {
@@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
- bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+ if (isTailCall)
+ InChain = TCChain;
+
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo.first;
}
@@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
-
return CallInfo.first;
}
@@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
return CallInfo;
}
@@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
case MVT::f80: LC = Call_F80; break;
@@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
DebugLoc dl = Node->getDebugLoc();
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
-
- // Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
- // was added by LowerCallTo (guaranteeing proper serialization of calls).
- LegalizeOp(CallInfo.second);
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+ MachinePointerInfo(), false, false, false, 0);
Results.push_back(CallInfo.first);
Results.push_back(Rem);
}
@@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
false, false, 0);
// load the constructed double
SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// offset depending on the data type.
uint64_t FF;
switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported integer type!");
+ default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, Alignment);
+ false, false, false, Alignment);
else {
- FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
- DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, Alignment));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
}
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
EVT SHVT = TLI.getShiftAmountTy(VT);
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unhandled Expand type in BSWAP!");
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
DebugLoc dl) {
switch (Opc) {
- default: assert(0 && "Cannot expand this yet!");
+ default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return Op;
}
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
// for now, we do this:
// x = x | (x >> 1);
@@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
Op = DAG.getNOT(dl, Op, VT);
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
return ExpandChainLibCall(LC, Node, false);
}
-void SelectionDAGLegalize::ExpandNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
DebugLoc dl = Node->getDebugLoc();
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
@@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// If the target didn't expand these, there's nothing to do, so just
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
@@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
@@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
Results.push_back(CallResult.second);
@@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, 0);
+ MachinePointerInfo(V),
+ false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
- false, false, 0));
+ false, false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
}
@@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
Node->getOperand(2), MachinePointerInfo(VS),
- false, false, 0);
+ false, false, false, 0);
Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
@@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Node->getOperand(2), dl));
break;
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
- if (!TLI.isTypeLegal(EltVT))
- EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 16> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
@@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(0),
+ Op0,
DAG.getIntPtrConstant(Idx)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(1),
+ Op1,
DAG.getIntPtrConstant(Idx - NumElems)));
}
+
Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
break;
}
@@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
- if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
- Results.push_back(SDValue(Node, 0));
- else
- Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
break;
}
case ISD::EHSELECTION: {
@@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::EXCEPTIONADDR: {
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
assert(Reg && "Can't expand to unknown register!");
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
Node->getValueType(0)));
Results.push_back(Results[0].getValue(1));
break;
}
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ "Don't know how to expand this FP subtraction!");
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::SUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(1));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, delete the
+ // node. The above EXTRACT_ELEMENT nodes should have been folded.
+ DAG.DeleteNode(Ret.getNode());
}
if (isSigned) {
@@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- LastCALLSEQ_END = DAG.getEntryNode();
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(Tmp1);
break;
}
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+ SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ VT.getScalarType(),
+ Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Scalars[0], Scalars.size());
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
// FIXME: Custom lowering for these operations shouldn't return null!
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- Results.push_back(SDValue(Node, i));
break;
}
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
-void SelectionDAGLegalize::PromoteNode(SDNode *Node,
- SmallVectorImpl<SDValue> &Results) {
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
EVT OVT = Node->getValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
Node->getOpcode() == ISD::SINT_TO_FP ||
@@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
SDValue Tmp1, Tmp2, Tmp3;
switch (Node->getOpcode()) {
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- // Perform the larger operation.
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
if (Node->getOpcode() == ISD::CTTZ) {
- //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ // FIXME: This should set a bit in the zero extended value instead.
Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
ISD::SETEQ);
Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ) {
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
@@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Node->getOpcode() == ISD::SINT_TO_FP, dl);
Results.push_back(Tmp1);
break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ break;
+ }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
break;
}
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
// Cast the two input vectors.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
}
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
}
// SelectionDAG::Legalize - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7c1cc69d6a2f..e3938968b205 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT,
- L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+ L->getPointerInfo(), NVT, L->isVolatile(),
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
case ISD::SETUEQ:
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
break;
- default: assert(false && "Do not know how to soften this setcc!");
+ default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
case MVT::i32:
Parts = TwoE32;
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5c4c2ded4c5..95ddb1e0f6fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,7 +20,6 @@
#include "LegalizeTypes.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
- if (NOutVT.bitsEq(NInVT))
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
@@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case TargetLowering::TypeWidenVector:
- if (OutVT.bitsEq(NInVT))
- // The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
- Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(ISD::SUB, dl, NVT, Op,
DAG.getConstant(NVT.getSizeInBits() -
@@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // The count is the same in the promoted type except if the original
- // value was zero. This can be handled by setting the bit just off
- // the top of the original type.
- APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.setBit(OVT.getSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
- return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
- SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
// If we don't know anything about the high bits, exit.
if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
}
-#if 0
- // FIXME: This code is broken for shifts with a zero amount!
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
if ((KnownZero & HighBitMask) == HighBitMask) {
- // Compute 32-amt.
- SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
- DAG.getConstant(NVTBits, ShTy),
- Amt);
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
unsigned Op1, Op2;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
@@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
}
- Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
- Hi = DAG.getNode(ISD::OR, NVT,
- DAG.getNode(Op1, NVT, InH, Amt),
- DAG.getNode(Op2, NVT, InL, Amt2));
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
return true;
}
-#endif
return false;
}
@@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
}
-
- return false;
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
@@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
+ bool isInvariant = N->isInvariant();
DebugLoc dl = N->getDebugLoc();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
- true, Func, Args, DAG, dl);
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
@@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
else if (SrcVT == MVT::i128)
FF = APInt(32, F32TwoE128);
else
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
// Check whether the sign bit is set.
SDValue Lo, Hi;
@@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType() &&
- "Invalid input vector types");
-
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT InElemTy = OutVT.getVectorElementType();
EVT OutElemTy = NOutVT.getVectorElementType();
- unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
- unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
unsigned NumOutElem = NOutVT.getVectorNumElements();
- assert(NumElem0 + NumElem1 == NumOutElem &&
- "Invalid number of incoming elements");
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
- for (unsigned i = 0; i < NumElem0; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op0.getValueType().getScalarType(), Op0,
- DAG.getIntPtrConstant(i));
- Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
- }
-
- // Take the elements from the second vector
- for (unsigned i = 0; i < NumElem1; ++i) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- Op1.getValueType().getScalarType(), Op1,
- DAG.getIntPtrConstant(i));
- Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InElemTy, Op, DAG.getIntPtrConstant(j));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a4bb577433cc..439aa4de5cf5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
switch (getTypeAction(ResultVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
EVT OpVT = N->getOperand(i).getValueType();
switch (getTypeAction(OpVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
- assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
- // Splice the libcall in wherever FindInputOutputChains tells us to.
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index abacdac686bc..e8664458e9a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -521,6 +521,7 @@ private:
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -633,6 +634,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 8e7e4985e4d0..a8ff7c65abde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -21,7 +21,6 @@
#include "LegalizeTypes.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
break;
@@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
+ false, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
PtrInfo.getWithOffset(IncrementSize), false,
- false, MinAlign(Alignment, IncrementSize));
+ false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
if (TLI.isBigEndian())
@@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- isVolatile, isNonTemporal,
+ isVolatile, isNonTemporal, isInvariant,
MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f815b00db5d6..3ae8345bd198 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,8 @@ class VectorLegalizer {
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
// Implements vector promotion; this is essentially just bitcasting the
// operands to a different type and bitcasting the result back to the
@@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDValue Result =
SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ return TranslateLegalizeResults(Op, Result);
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ST->getValue().getValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ Changed = true;
+ return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ }
+
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
J != E;
@@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
- case ISD::CTTZ:
case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
@@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+ Op.getNode()->getValueType(0).getScalarType(),
+ Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ DebugLoc dl = Op.getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ SDValue Chain = ST->getChain();
+ SDValue BasePTR = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ unsigned NumElem = StVT.getVectorNumElements();
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ // Cast floats into integers
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+
+ // Store Stride in bytes
+ unsigned Stride = ScalarSize/8;
+ // Extract each of the elements from the original vector
+ // and save them into memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+ ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getIntPtrConstant(Stride));
+
+ Stores.push_back(Store);
+ }
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
- if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
&& "Invalid mask size");
@@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
DebugLoc DL = Op.getDebugLoc();
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
- return DAG.UnrollVectorOp(Op.getNode());
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
EVT SVT = VT.getScalarType();
assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107a42b2951c..5f23f01dafb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,6 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
@@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment());
+ N->isInvariant(), N->getOriginalAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
return InOp;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
switch (N->getOpcode()) {
default:
@@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
- case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Lo part from the stack slot.
Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, MinAlign(Alignment, IncrementSize));
+ false, false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- Alignment);
+ isInvariant, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, Alignment);
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
DebugLoc dl = N->getDebugLoc();
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
- // Split the input.
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
- switch (getTypeAction(InVT)) {
- default: llvm_unreachable("Unexpected type action!");
- case TargetLowering::TypeLegal: {
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ } else {
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypePromoteInteger: {
- SDValue InOp = GetPromotedInteger(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
- InOp.getValueType().getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
- case TargetLowering::TypeSplitVector:
- GetSplitVector(N->getOperand(0), Lo, Hi);
- break;
- case TargetLowering::TypeWidenVector: {
- // If the result needs to be split and the input needs to be widened,
- // the two types must have different lengths. Use the widened result
- // and extract from it to do the split.
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- break;
- }
}
if (N->getOpcode() == ISD::FP_ROUND) {
@@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
@@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
@@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
WidenVT, Cond1, InOp1, InOp2);
}
@@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
@@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
MVT::Other,&StChain[0],StChain.size());
}
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
//===----------------------------------------------------------------------===//
@@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, Align);
+ isVolatile, isNonTemporal, isInvariant, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
+ SDValue L;
if (LdWidth < NewVTWidth) {
// Our current type we are using is too large, find a better size
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
- }
-
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
isVolatile,
- isNonTemporal, MinAlign(Align, Increment));
- LdChain.push_back(LdOp.getValue(1));
- LdOps.push_back(LdOp);
+ isNonTemporal, isInvariant,
+ MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
LdWidth -= NewVTWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..ff0136e08cd9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirment could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ EVT VT = SU->getNode()->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = Queue.begin(),
+ E = Queue.end(); I != E; ++I) {
+ if (*I == *Best)
+ continue;
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c6321ae4..24da432a47a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
SmallVector<SUnit *, 16> Queue;
bool empty() const { return Queue.empty(); }
-
+
void push(SUnit *U) {
Queue.push_back(U);
}
@@ -101,8 +101,8 @@ private:
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
- /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
- bool ForceUnitLatencies() const { return true; }
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const { return true; }
};
} // end anonymous namespace
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
// Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
- SUnit *NewSU = NewSUnit(N);
+ SUnit *NewSU = newSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
- LoadSU = NewSUnit(LoadNode);
+ LoadSU = newSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
}
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
if (isNewLoad) {
AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
}
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
- SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
Added = true;
}
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
if (RegAdded.insert(*Alias)) {
LRegs.push_back(*Alias);
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/true);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e757defd3895..2cb5d37d689e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -45,10 +45,6 @@ static RegisterScheduler
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
static RegisterScheduler
- tdrListrDAGScheduler("list-tdrr",
- "Top-down register reduction list scheduling",
- createTDRRListDAGScheduler);
-static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
"order when possible",
@@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
static cl::opt<bool> DisableSchedHeight(
"disable-sched-height", cl::Hidden, cl::init(false),
cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
static cl::opt<int> MaxReorderWindow(
"max-sched-reorder", cl::Hidden, cl::init(6),
@@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
"sched-avg-ipc", cl::Hidden, cl::init(1),
cl::desc("Average inst/cycle whan no target itinerary exists."));
-#ifndef NDEBUG
-namespace {
- // For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
- FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -121,10 +109,6 @@ namespace {
///
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
- /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
- /// it is top-down.
- bool isBottomUp;
-
/// NeedLatency - True if the scheduler will make use of latency information.
///
bool NeedLatency;
@@ -162,11 +146,15 @@ private:
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
CodeGenOpt::Level OptLevel)
- : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ : ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits) {
@@ -221,8 +209,6 @@ private:
void ReleasePred(SUnit *SU, const SDep *PredEdge);
void ReleasePredecessors(SUnit *SU);
- void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
- void ReleaseSuccessors(SUnit *SU);
void ReleasePending();
void AdvanceToCycle(unsigned NextCycle);
void AdvancePastStalls(SUnit *SU);
@@ -242,15 +228,11 @@ private:
SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
- void ScheduleNodeTopDown(SUnit*);
- void ListScheduleTopDown();
-
-
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
- SUnit *NewNode = NewSUnit(N);
+ SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
@@ -268,9 +250,9 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
/// need actual latency information but the hybrid scheduler does.
- bool ForceUnitLatencies() const {
+ bool forceUnitLatencies() const {
return !NeedLatency;
}
};
@@ -278,7 +260,7 @@ private:
/// GetCostForDef - Looks up the register class and cost for a given definition.
/// Typically this just means looking up the representative register class,
-/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
/// opcode to determine what register class is being generated.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
const TargetLowering *TLI,
@@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// Special handling for untyped values. These values can only come from
// the expansion of custom DAG-to-DAG patterns.
- if (VT == MVT::untyped) {
+ if (VT == MVT::Untyped) {
const SDNode *Node = RegDefPos.GetNode();
unsigned Opcode = Node->getMachineOpcode();
@@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
<< " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- FactorCount[i] = 0;
- }
-#endif //!NDEBUG
CurCycle = 0;
IssueCount = 0;
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegGens.resize(TRI->getNumRegs(), NULL);
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() {
HazardRec->Reset();
- // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
- if (isBottomUp)
- ListScheduleBottomUp();
- else
- ListScheduleTopDown();
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
- }
-#endif // !NDEBUG
AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
- if (!ForceUnitLatencies()) {
+ if (!forceUnitLatencies()) {
// Updating predecessor's height. This is now the cycle when the
// predecessor can be scheduled without causing a pipeline stall.
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
}
}
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = 0;
+ unsigned BestMaxNest = MaxNest;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other) {
+ N = N->getOperand(i).getNode();
+ goto found_chain_operand;
+ }
+ return 0;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return 0;
+ }
+}
+
/// Call ReleasePred for each predecessor, then update register live def/gen.
/// Always update LiveRegDefs for a register dependence even if the current SU
/// also defines the register. This effectively create one large live range
@@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
}
}
}
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
}
/// Check to see if any of the pending instructions are ready to issue. If
@@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
- unsigned ReadyCycle =
- isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
if (ReadyCycle < MinAvailableCycle)
MinAvailableCycle = ReadyCycle;
@@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
}
else {
for (; CurCycle != NextCycle; ++CurCycle) {
- if (isBottomUp)
- HazardRec->RecedeCycle();
- else
- HazardRec->AdvanceCycle();
+ HazardRec->RecedeCycle();
}
}
// FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
@@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// currently need to treat these nodes like real instructions.
// if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
- unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+ unsigned ReadyCycle = SU->getHeight();
// Bump CurCycle to account for latency. We assume the latency of other
// available instructions may be hidden by the stall (not a full pipe stall).
@@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
// Calls are scheduled in their preceding cycle, so don't conflict with
// hazards from instructions after the call. EmitNode will reset the
// scoreboard state before emitting the call.
- if (isBottomUp && SU->isCall)
+ if (SU->isCall)
return;
// FIXME: For resource conflicts in very long non-pipelined stages, we
@@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
int Stalls = 0;
while (true) {
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+ HazardRec->getHazardType(SU, -Stalls);
if (HT == ScheduleHazardRecognizer::NoHazard)
break;
@@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
HazardRec->Reset();
return;
}
- if (isBottomUp && SU->isCall) {
+ if (SU->isCall) {
// Calls are scheduled with their preceding instructions. For bottom-up
// scheduling, clear the pipeline state before emitting.
HazardRec->Reset();
}
HazardRec->EmitInstruction(SU);
-
- if (!isBottomUp && SU->isCall) {
- HazardRec->Reset();
- }
}
static void resetVRegCycle(SUnit *SU);
@@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
Sequence.push_back(SU);
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
LiveRegGens[I->getReg()] = NULL;
}
}
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
resetVRegCycle(SU);
@@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
}
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = NULL;
+ LiveRegGens[CallResource] = NULL;
+ }
+ }
+
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
// This becomes the nearest def. Note that an earlier def may still be
// pending if this is a two-address node.
LiveRegDefs[I->getReg()] = SU;
- if (!LiveRegDefs[I->getReg()]) {
- ++NumLiveRegs;
- }
if (LiveRegGens[I->getReg()] == NULL ||
I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
LiveRegGens[I->getReg()] = I->getSUnit();
@@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
else {
AvailableQueue->push(SU);
}
- AvailableQueue->UnscheduledNode(SU);
+ AvailableQueue->unscheduledNode(SU);
}
/// After backtracking, the hazard checker needs to be restored to a state
@@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return NULL;
+
DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
@@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
LoadNode->setNodeId(LoadSU->NodeNum);
InitNumRegDefsLeft(LoadSU);
- ComputeLatency(LoadSU);
+ computeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
@@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
NewSU->isCommutable = true;
InitNumRegDefsLeft(NewSU);
- ComputeLatency(NewSU);
+ computeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
@@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = MCID.getNumDefs();
- for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
@@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
}
}
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (!Node->isMachineOpcode())
continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
- for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+ for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
@@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
- VerifySchedule(isBottomUp);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
- SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
- if (SuccSU->NumPredsLeft == 0) {
- dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
- dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
- }
-#endif
- --SuccSU->NumPredsLeft;
-
- // If all the node's predecessors are scheduled, this node is ready
- // to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
- SuccSU->isAvailable = true;
- AvailableQueue->push(SuccSU);
- }
-}
-
-void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
- // Top down: release successors
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- assert(!I->isAssignedRegDep() &&
- "The list-tdrr scheduler doesn't yet support physreg dependencies!");
-
- ReleaseSucc(SU, &*I);
- }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
-
- assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
- SU->setDepthToAtLeast(CurCycle);
- Sequence.push_back(SU);
-
- ReleaseSuccessors(SU);
- SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGRRList::ListScheduleTopDown() {
- AvailableQueue->setCurCycle(CurCycle);
-
- // Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
-
- // All leaves to Available queue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- // It is available if it has no predecessors.
- if (SUnits[i].Preds.empty()) {
- AvailableQueue->push(&SUnits[i]);
- SUnits[i].isAvailable = true;
- }
- }
-
- // While Available queue is not empty, grab the node with the highest
- // priority. If it is not ready put it back. Schedule the node.
- Sequence.reserve(SUnits.size());
- while (!AvailableQueue->empty()) {
- SUnit *CurSU = AvailableQueue->pop();
-
- if (CurSU)
- ScheduleNodeTopDown(CurSU);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
- }
-
-#ifndef NDEBUG
- VerifySchedule(isBottomUp);
+ VerifyScheduledSequence(/*isBottomUp=*/true);
#endif
}
-
//===----------------------------------------------------------------------===//
// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
@@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort {
bool operator()(SUnit* left, SUnit* right) const;
};
-// td_ls_rr_sort - Priority function for top down register pressure reduction
-// scheduler.
-struct td_ls_rr_sort : public queue_sort {
- enum {
- IsBottomUp = false,
- HasReadyFilter = false
- };
-
- RegReductionPQBase *SPQ;
- td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
-};
-
// src_ls_rr_sort - Priority function for source order scheduler.
struct src_ls_rr_sort : public queue_sort {
enum {
@@ -1510,6 +1587,7 @@ protected:
std::vector<SUnit*> Queue;
unsigned CurQueueId;
bool TracksRegPressure;
+ bool SrcOrder;
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
@@ -1535,11 +1613,12 @@ public:
RegReductionPQBase(MachineFunction &mf,
bool hasReadyFilter,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
: SchedulingPriorityQueue(hasReadyFilter),
- CurQueueId(0), TracksRegPressure(tracksrp),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
@@ -1610,9 +1689,9 @@ public:
int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
- void ScheduledNode(SUnit *SU);
+ void scheduledNode(SUnit *SU);
- void UnscheduledNode(SUnit *SU);
+ void unscheduledNode(SUnit *SU);
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
public:
RegReductionPriorityQueue(MachineFunction &mf,
bool tracksrp,
+ bool srcorder,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
- : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
Picker(this) {}
bool isBottomUp() const { return SF::IsBottomUp; }
@@ -1680,10 +1761,7 @@ public:
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
- if (isBottomUp())
- dbgs() << "Height " << SU->getHeight() << ": ";
- else
- dbgs() << "Depth " << SU->getDepth() << ": ";
+ dbgs() << "Height " << SU->getHeight() << ": ";
SU->dump(DAG);
}
}
@@ -1692,9 +1770,6 @@ public:
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
BURegReductionPriorityQueue;
-typedef RegReductionPriorityQueue<td_ls_rr_sort>
-TDRegReductionPriorityQueue;
-
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
@@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
return PDiff;
}
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
dumpRegPressure();
}
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
@@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LHeight = (int)left->getHeight() + LPenalty;
int RHeight = (int)right->getHeight() + RPenalty;
- bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
BUHasStall(left, LHeight, SPQ);
- bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
BUHasStall(right, RHeight, SPQ);
// If scheduling one of the node will cause a pipeline stall, delay it.
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall) {
- DEBUG(++FactorCount[FactStall]);
+ if (!RStall)
return 1;
- }
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactStall]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
- } else if (RStall) {
- DEBUG(++FactorCount[FactStall]);
+ } else if (RStall)
return -1;
- }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
- if (!checkPref || (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency)) {
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
@@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency) {
- DEBUG(++FactorCount[FactOther]);
+ if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
- }
}
return 0;
}
@@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool LHasPhysReg = left->hasPhysRegDefs;
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
- DEBUG(++FactorCount[FactRegUses]);
#ifndef NDEBUG
const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
#endif
@@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
}
- if (LPriority != RPriority) {
- DEBUG(++FactorCount[FactStatic]);
+ if (LPriority != RPriority)
return LPriority > RPriority;
- }
// One or both of the nodes are calls and their sethi-ullman numbers are the
// same, then keep source order.
@@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist) {
- DEBUG(++FactorCount[FactOther]);
+ if (LDist != RDist)
return LDist < RDist;
- }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch) {
- DEBUG(++FactorCount[FactOther]);
+ if (LScratch != RScratch)
return LScratch > RScratch;
- }
// Comparing latency against a call makes little sense unless the node
// is register pressure-neutral.
@@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight()) {
- DEBUG(++FactorCount[FactHeight]);
+ if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
- }
- if (left->getDepth() != right->getDepth()) {
- DEBUG(++FactorCount[FactDepth]);
+ if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
- }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
- DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
@@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
<< " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
@@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
bool LReduce = canEnableCoalescing(left);
bool RReduce = canEnableCoalescing(right);
- DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
if (LReduce && !RReduce) return false;
if (RReduce && !LReduce) return true;
}
@@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
- DEBUG(++FactorCount[FactRegUses]);
return LLiveUses < RLiveUses;
}
if (!DisableSchedStalls) {
bool LStall = BUHasStall(left, left->getHeight(), SPQ);
bool RStall = BUHasStall(right, right->getHeight(), SPQ);
- if (LStall != RStall) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LStall != RStall)
return left->getHeight() > right->getHeight();
- }
}
if (!DisableSchedCriticalPath) {
@@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
<< left->getDepth() << " != SU(" << right->NodeNum << "): "
<< right->getDepth() << "\n");
- DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
}
}
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
int spread = (int)left->getHeight() - (int)right->getHeight();
- if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(++FactorCount[FactHeight]);
+ if (std::abs(spread) > MaxReorderWindow)
return left->getHeight() > right->getHeight();
- }
}
return BURRSort(left, right, SPQ);
@@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
+ if (!TracksRegPressure && !SrcOrder)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
@@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const unsigned *ImpDefs
+ const uint16_t *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
- if(!ImpDefs)
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
return false;
for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
if (!PI->isAssignedRegDep())
continue;
- for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries a
- // topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
- return true;
- }
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
}
}
return false;
@@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const unsigned *SUImpDefs =
+ const uint16_t *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
- if (!SUImpDefs)
- return false;
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
@@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!N->hasAnyUseOfValue(i))
continue;
unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
for (;*SUImpDefs; ++SUImpDefs) {
unsigned SUReg = *SUImpDefs;
if (TRI->regsOverlap(Reg, SUReg))
@@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
}
}
-/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
-/// predecessors of the successors of the SUnit SU. Stop when the provided
-/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
- unsigned Limit) {
- unsigned Sum = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- const SUnit *SuccSU = I->getSUnit();
- for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
- EE = SuccSU->Preds.end(); II != EE; ++II) {
- SUnit *PredSU = II->getSUnit();
- if (!PredSU->isScheduled)
- if (++Sum > Limit)
- return Sum;
- }
- }
- return Sum;
-}
-
-
-// Top down
-bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
- if (int res = checkSpecialNodes(left, right))
- return res < 0;
-
- unsigned LPriority = SPQ->getNodePriority(left);
- unsigned RPriority = SPQ->getNodePriority(right);
- bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
- bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
- bool LIsFloater = LIsTarget && left->NumPreds == 0;
- bool RIsFloater = RIsTarget && right->NumPreds == 0;
- unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
- unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
-
- if (left->NumSuccs == 0 && right->NumSuccs != 0)
- return false;
- else if (left->NumSuccs != 0 && right->NumSuccs == 0)
- return true;
-
- if (LIsFloater)
- LBonus -= 2;
- if (RIsFloater)
- RBonus -= 2;
- if (left->NumSuccs == 1)
- LBonus += 2;
- if (right->NumSuccs == 1)
- RBonus += 2;
-
- if (LPriority+LBonus != RPriority+RBonus)
- return LPriority+LBonus < RPriority+RBonus;
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
-
- if (left->NumSuccsLeft != right->NumSuccsLeft)
- return left->NumSuccsLeft > right->NumSuccsLeft;
-
- assert(left->NodeQueueId && right->NodeQueueId &&
- "NodeQueueId cannot be zero");
- return (left->NodeQueueId > right->NodeQueueId);
-}
-
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
- new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
- PQ->setScheduleDAG(SD);
- return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- TDRegReductionPriorityQueue *PQ =
- new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
- new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
HybridBURRPriorityQueue *PQ =
- new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
@@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
const TargetLowering *TLI = &IS->getTargetLowering();
ILPBURRPriorityQueue *PQ =
- new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6fa47a..69dd813b24e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf),
+ : ScheduleDAG(mf), BB(0), DAG(0),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
DAG = dag;
- ScheduleDAG::Run(bb, insertPos);
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
}
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
const SUnit *Addr = 0;
if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
}
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
- SUnit *SU = NewSUnit(Old->getNode());
+ SUnit *SU = newSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
- SUnit *NodeSUnit = NewSUnit(NI);
+ SUnit *NodeSUnit = newSUnit(NI);
// See if anything is glued to this node, if so, add them to glued
// nodes. Nodes can have at most one glue input and one glue output. Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
- ComputeLatency(NodeSUnit);
+ computeLatency(NodeSUnit);
}
// Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
// Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
+ bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
}
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies()) {
+ if (forceUnitLatencies()) {
SU->Latency = 1;
return;
}
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
SU->Latency += TII->getInstrLatency(InstrItins, N);
}
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const{
// Check to see if the scheduler cares about latencies.
- if (ForceUnitLatencies())
+ if (forceUnitLatencies())
return;
if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
}
}
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
namespace {
struct OrderSorter {
bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SUnit *SU = Sequence[i];
if (!SU) {
// Null SUnit* is a noop.
- EmitNoop();
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
continue;
}
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
- EmitPhysRegCopy(SU, CopyVRBaseMap);
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
continue;
}
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
while (DI != DE) {
- MachineBasicBlock *InsertBB = Emitter.getBlock();
- MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
- if (!(*DI)->isInvalidated()) {
- MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
- if (DbgMI)
- InsertBB->insert(Pos, DbgMI);
- }
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
++DI;
}
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
}
- BB = Emitter.getBlock();
InsertPos = Emitter.getInsertPos();
- return BB;
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2ea02ec..75940ec33ddc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
///
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
+ MachineBasicBlock *BB;
SelectionDAG *DAG; // DAG of the current basic block
const InstrItineraryData *InstrItins;
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
explicit ScheduleDAGSDNodes(MachineFunction &mf);
virtual ~ScheduleDAGSDNodes() {}
/// Run - perform scheduling.
///
- void Run(SelectionDAG *dag, MachineBasicBlock *bb,
- MachineBasicBlock::iterator insertPos);
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
/// isPassiveNode - Return true if the node is a non-scheduled leaf.
///
@@ -53,6 +56,7 @@ namespace llvm {
if (isa<ConstantSDNode>(Node)) return true;
if (isa<ConstantFPSDNode>(Node)) return true;
if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
if (isa<GlobalAddressSDNode>(Node)) return true;
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
@@ -67,7 +71,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N);
+ SUnit *newSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -78,7 +82,7 @@ namespace llvm {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- virtual void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AliasAnalysis *AA);
/// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
/// CopyToReg and its only active data operands are CopyFromReg within a
@@ -90,30 +94,41 @@ namespace llvm {
///
void InitNumRegDefsLeft(SUnit *SU);
- /// ComputeLatency - Compute node latency.
+ /// computeLatency - Compute node latency.
///
- virtual void ComputeLatency(SUnit *SU);
+ virtual void computeLatency(SUnit *SU);
- /// ComputeOperandLatency - Override dependence edge latency using
+ /// computeOperandLatency - Override dependence edge latency using
/// operand use/def information
///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const { }
- virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
- virtual MachineBasicBlock *EmitSchedule();
-
/// Schedule - Order nodes according to selected style, filling
/// in the Sequence member.
///
virtual void Schedule() = 0;
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
virtual void dumpNode(const SUnit *SU) const;
+ void dumpSchedule() const;
+
virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ virtual std::string getDAGName() const;
+
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
/// RegDefIter - In place iteration over the values defined by an
@@ -159,6 +174,9 @@ namespace llvm {
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
};
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 430283d5eff9..c8512914c1e2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -31,6 +31,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include <climits>
using namespace llvm;
@@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
- tdListDAGScheduler("list-td", "Top-down list scheduler",
- createTDListDAGScheduler);
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
namespace {
//===----------------------------------------------------------------------===//
-/// ScheduleDAGList - The actual list scheduler implementation. This supports
-/// top-down scheduling.
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
///
-class ScheduleDAGList : public ScheduleDAGSDNodes {
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
@@ -61,16 +62,20 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
public:
- ScheduleDAGList(MachineFunction &mf,
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetMachine &tm = mf.getTarget();
HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
}
- ~ScheduleDAGList() {
+ ~ScheduleDAGVLIW() {
delete HazardRec;
delete AvailableQueue;
}
@@ -78,23 +83,25 @@ public:
void Schedule();
private:
- void ReleaseSucc(SUnit *SU, const SDep &D);
- void ReleaseSuccessors(SUnit *SU);
- void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
- void ListScheduleTopDown();
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
};
} // end anonymous namespace
/// Schedule - Schedule the DAG using list scheduling.
-void ScheduleDAGList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
- BuildSchedGraph(NULL);
+ BuildSchedGraph(AA);
AvailableQueue->initNodes(SUnits);
- ListScheduleTopDown();
+ listScheduleTopDown();
AvailableQueue->releaseState();
}
@@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() {
// Top-Down Scheduling
//===----------------------------------------------------------------------===//
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
/// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
SUnit *SuccSU = D.getSUnit();
#ifndef NDEBUG
@@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
- if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
PendingQueue.push_back(SuccSU);
+ }
}
-void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
// Top down: release successors.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
assert(!I->isAssignedRegDep() &&
"The list-td scheduler doesn't yet support physreg dependencies!");
- ReleaseSucc(SU, *I);
+ releaseSucc(SU, *I);
}
}
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
- ReleaseSuccessors(SU);
+ releaseSuccessors(SU);
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
+ AvailableQueue->scheduledNode(SU);
}
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// listScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
-void ScheduleDAGList::ListScheduleTopDown() {
+void ScheduleDAGVLIW::listScheduleTopDown() {
unsigned CurCycle = 0;
// Release any successors of the special Entry node.
- ReleaseSuccessors(&EntrySU);
+ releaseSuccessors(&EntrySU);
- // All leaves to Available queue.
+ // All leaves to AvailableQueue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
if (SUnits[i].Preds.empty()) {
@@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
}
- // While Available queue is not empty, grab the node with the highest
+ // While AvailableQueue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
Sequence.reserve(SUnits.size());
@@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
- } else {
+ }
+ else {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
@@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(0);
++CurCycle;
continue;
}
@@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If we found a node to schedule, do it now.
if (FoundSUnit) {
- ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
// If this is a pseudo-op node, we don't want to increment the current
@@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
#ifndef NDEBUG
- VerifySchedule(/*isBottomUp=*/false);
+ VerifyScheduledSequence(/*isBottomUp=*/false);
#endif
}
@@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler.
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
-llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 20bea8e4c9e9..92671d1678c6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP format");
+ case MVT::f16: return &APFloat::IEEEhalf;
case MVT::f32: return &APFloat::IEEEsingle;
case MVT::f64: return &APFloat::IEEEdouble;
case MVT::f80: return &APFloat::x87DoubleExtended;
@@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
if (i == e) return false;
// Do not accept build_vectors that aren't all constants or which have non-~0
- // elements.
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
if (isa<ConstantSDNode>(NotZero)) {
- if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+ EltSize)
return false;
} else if (isa<ConstantFPSDNode>(NotZero)) {
- if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
- bitcastToAPInt().isAllOnesValue())
+ if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+ .bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
// Okay, we have at least one ~0 value, check to see if the rest match or are
- // undefs.
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
if (N->getOperand(i) != NotZero &&
N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::Register:
ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
break;
-
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
case ISD::SRCVALUE:
ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
break;
@@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
///
static inline unsigned
encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
- bool isNonTemporal) {
+ bool isNonTemporal, bool isInvariant) {
assert((ConvType & 3) == ConvType &&
"ConvType may not require more than 2 bits!");
assert((AM & 7) == AM &&
@@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
return ConvType |
(AM << 2) |
(isVolatile << 5) |
- (isNonTemporal << 6);
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
}
//===----------------------------------------------------------------------===//
@@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
RemoveDeadNodes(DeadNodes, UpdateListener);
}
@@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
@@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
return getConstantFP(APFloat((float)Val), VT, isTarget);
else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
- else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
- } else {
- assert(0 && "Unsupported type in getConstantFP");
- return SDValue();
- }
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(N, 0);
}
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
@@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
return (KnownZero & Mask) == Mask;
}
@@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
/// known to be either zero or one and return them in the KnownZero/KnownOne
/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
/// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
- APInt &KnownZero, APInt &KnownOne,
- unsigned Depth) const {
- unsigned BitWidth = Mask.getBitWidth();
- assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
- "Mask size mismatches value type size!");
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
- if (Depth == 6 || Mask == 0)
+ if (Depth == 6)
return; // Limit search depth.
APInt KnownZero2, KnownOne2;
@@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
- KnownZero = ~KnownOne & Mask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return;
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero |= KnownZero2;
return;
case ISD::OR:
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
- KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownOne |= KnownOne2;
return;
case ISD::XOR: {
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::MUL: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
LeadZ = std::min(LeadZ, BitWidth);
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
return;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(Op.getOperand(1),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
return;
}
case ISD::SELECT:
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZero &= KnownZero2;
return;
case ISD::SELECT_CC:
- ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
@@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero <<= ShAmt;
KnownOne <<= ShAmt;
@@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
KnownZero |= HighBits; // High bits known zero.
}
return;
@@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (ShAmt >= BitWidth)
return;
- APInt InDemandedMask = (Mask << ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
- if (HighBits.getBoolValue())
- InDemandedMask |= APInt::getSignBit(BitWidth);
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
APInt InSignBit = APInt::getSignBit(EBits);
- APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
@@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
@@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
if (ISD::isZEXTLoad(Op.getNode())) {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ computeMaskedBitsLoad(*Ranges, KnownZero);
}
return;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
@@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask.trunc(InBits);
-
- // If any of the sign extended bits are demanded, we know that the sign
- // bit is demanded. Temporarily set this bit in the mask for our callee.
- if (NewBits.getBoolValue())
- InMask |= InSignBit;
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
bool SignBitKnownZero = KnownZero.isNegative();
@@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
assert(!(SignBitKnownZero && SignBitKnownOne) &&
"Sign bit can't be known to be both zero and one!");
- // If the sign bit wasn't actually demanded by our caller, we don't
- // want it set in the KnownZero and KnownOne result values. Reset the
- // mask and reapply it to the result values.
- InMask = Mask.trunc(InBits);
- KnownZero &= InMask;
- KnownOne &= InMask;
-
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.trunc(InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
return;
@@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask.zext(InBits);
KnownZero = KnownZero.zext(InBits);
KnownOne = KnownOne.zext(InBits);
- ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.trunc(BitWidth);
KnownOne = KnownOne.trunc(BitWidth);
@@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
- KnownOne, Depth+1);
- KnownZero |= (~InMask) & Mask;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
return;
}
case ISD::FGETSIGN:
@@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if ((KnownZero2 & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
}
}
}
@@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
- APInt Mask2 = APInt::getLowBitsSet(BitWidth,
- BitWidth - Mask.countLeadingZeros());
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
- ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
@@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// the upper bits are all one.
if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
-
- KnownZero &= Mask;
- KnownOne &= Mask;
-
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
@@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ KnownZero |= ~LowBits;
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
break;
}
@@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
- Depth+1);
- ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
- Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
return;
}
case ISD::FrameIndex:
@@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
- Depth);
+ TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
return;
}
}
@@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::TRUNCATE:
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
@@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VTBits);
- ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
} else if (KnownOne.isNegative()) { // sign bit is 1;
@@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::CTPOP:
return getConstant(Val.countPopulation(), VT);
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
return getConstant(Val.countLeadingZeros(), VT);
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), VT);
}
}
@@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
- case ISD::FP_ROUND:
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
"Vector element count mismatch!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
- OpOpcode == ISD::ANY_EXTEND) {
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else
- return Operand.getNode()->getOperand(0);
+ return Operand.getNode()->getOperand(0);
}
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BITCAST:
// Basic sanity checking.
@@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::FNEG) // --X -> X
@@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath) {
+ if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// 0+x --> x
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
default: break;
}
}
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
}
// Canonicalize an UNDEF to the RHS, even over a constant.
@@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath)
+ if (getTarget().Options.UnsafeFPMath)
return N2;
break;
case ISD::MUL:
@@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SELECT:
if (N1C) {
if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- else
- return N3; // select false, X, Y -> Y
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
}
if (N2 == N3) return N2; // select C, X, X -> X
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
- break;
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
- const TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
+ const TargetLowering &TLI, StringRef Str) {
// Handle vector with all elements zero.
if (Str.empty()) {
if (VT.isInteger())
@@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumBits = VT.getSizeInBits();
- unsigned MSB = NumBits / 8;
+ unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
uint64_t Val = 0;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
+
return DAG.getConstant(Val, VT);
}
@@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
/// isMemSrcFromString - Returns true if memcpy source is a string constant.
///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
unsigned SrcDelta = 0;
GlobalAddressSDNode *G = NULL;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
if (!G)
return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
- return true;
-
- return false;
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
}
/// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- NonScalarIntSafe, MemcpyStrSrc,
+ IsZeroVal, MemcpyStrSrc,
DAG.getMachineFunction());
if (VT == MVT::Other) {
@@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- std::string Str;
+ StringRef Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
SrcPtrInfo.getWithOffset(SrcOff), isVol,
- false, SrcAlign);
+ false, false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- bool NonScalarIntSafe =
+ bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
- NonScalarIntSafe, false, DAG, TLI))
+ IsZeroVal, false, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
@@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isVolatile, bool isNonTemporal, bool isInvariant,
+ unsigned Alignment, const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
@@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- TBAAInfo);
+ TBAAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isInvariant, unsigned Alignment,
+ const MDNode *TBAAInfo,
+ const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo, Ranges);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
@@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
TBAAInfo);
}
@@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ false, LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
@@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal()));
+ MMO->isNonTemporal(), MMO->isInvariant()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
return N;
}
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
@@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
- return ON;
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
}
if (!RemoveNodeFromCSEMaps(N))
@@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
- return cast<MachineSDNode>(E);
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
}
// Allocate a new MachineSDNode.
@@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
}
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
}
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
@@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User, &Listener);
}
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
}
namespace {
@@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(isNonTemporal() == MMO->isNonTemporal() &&
"Non-temporal encoding error!");
@@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal());
+ MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
}
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
- switch (getOpcode()) {
- default:
- if (getOpcode() < ISD::BUILTIN_OP_END)
- return "<<Unknown DAG Node>>";
- if (isMachineOpcode()) {
- if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
- if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->get(getMachineOpcode()).getName();
- return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
- }
- if (G) {
- const TargetLowering &TLI = G->getTargetLoweringInfo();
- const char *Name = TLI.getTargetNodeName(getOpcode());
- if (Name) return Name;
- return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
- }
- return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
- case ISD::DELETED_NODE:
- return "<<Deleted Node!>>";
-#endif
- case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
- case ISD::ATOMIC_FENCE: return "AtomicFence";
- case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
- case ISD::ATOMIC_SWAP: return "AtomicSwap";
- case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
- case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
- case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
- case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
- case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
- case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
- case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
- case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
- case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
- case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
- case ISD::ATOMIC_LOAD: return "AtomicLoad";
- case ISD::ATOMIC_STORE: return "AtomicStore";
- case ISD::PCMARKER: return "PCMarker";
- case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
- case ISD::SRCVALUE: return "SrcValue";
- case ISD::MDNODE_SDNODE: return "MDNode";
- case ISD::EntryToken: return "EntryToken";
- case ISD::TokenFactor: return "TokenFactor";
- case ISD::AssertSext: return "AssertSext";
- case ISD::AssertZext: return "AssertZext";
-
- case ISD::BasicBlock: return "BasicBlock";
- case ISD::VALUETYPE: return "ValueType";
- case ISD::Register: return "Register";
-
- case ISD::Constant: return "Constant";
- case ISD::ConstantFP: return "ConstantFP";
- case ISD::GlobalAddress: return "GlobalAddress";
- case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
- case ISD::FrameIndex: return "FrameIndex";
- case ISD::JumpTable: return "JumpTable";
- case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
- case ISD::RETURNADDR: return "RETURNADDR";
- case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
- case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
- case ISD::LSDAADDR: return "LSDAADDR";
- case ISD::EHSELECTION: return "EHSELECTION";
- case ISD::EH_RETURN: return "EH_RETURN";
- case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
- case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
- case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
- case ISD::ConstantPool: return "ConstantPool";
- case ISD::ExternalSymbol: return "ExternalSymbol";
- case ISD::BlockAddress: return "BlockAddress";
- case ISD::INTRINSIC_WO_CHAIN:
- case ISD::INTRINSIC_VOID:
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
- unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID);
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
- return TII->getName(IID);
- llvm_unreachable("Invalid intrinsic ID");
- }
-
- case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
- case ISD::TargetConstant: return "TargetConstant";
- case ISD::TargetConstantFP:return "TargetConstantFP";
- case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
- case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
- case ISD::TargetFrameIndex: return "TargetFrameIndex";
- case ISD::TargetJumpTable: return "TargetJumpTable";
- case ISD::TargetConstantPool: return "TargetConstantPool";
- case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
- case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
- case ISD::CopyToReg: return "CopyToReg";
- case ISD::CopyFromReg: return "CopyFromReg";
- case ISD::UNDEF: return "undef";
- case ISD::MERGE_VALUES: return "merge_values";
- case ISD::INLINEASM: return "inlineasm";
- case ISD::EH_LABEL: return "eh_label";
- case ISD::HANDLENODE: return "handlenode";
-
- // Unary operators
- case ISD::FABS: return "fabs";
- case ISD::FNEG: return "fneg";
- case ISD::FSQRT: return "fsqrt";
- case ISD::FSIN: return "fsin";
- case ISD::FCOS: return "fcos";
- case ISD::FTRUNC: return "ftrunc";
- case ISD::FFLOOR: return "ffloor";
- case ISD::FCEIL: return "fceil";
- case ISD::FRINT: return "frint";
- case ISD::FNEARBYINT: return "fnearbyint";
- case ISD::FEXP: return "fexp";
- case ISD::FEXP2: return "fexp2";
- case ISD::FLOG: return "flog";
- case ISD::FLOG2: return "flog2";
- case ISD::FLOG10: return "flog10";
-
- // Binary operators
- case ISD::ADD: return "add";
- case ISD::SUB: return "sub";
- case ISD::MUL: return "mul";
- case ISD::MULHU: return "mulhu";
- case ISD::MULHS: return "mulhs";
- case ISD::SDIV: return "sdiv";
- case ISD::UDIV: return "udiv";
- case ISD::SREM: return "srem";
- case ISD::UREM: return "urem";
- case ISD::SMUL_LOHI: return "smul_lohi";
- case ISD::UMUL_LOHI: return "umul_lohi";
- case ISD::SDIVREM: return "sdivrem";
- case ISD::UDIVREM: return "udivrem";
- case ISD::AND: return "and";
- case ISD::OR: return "or";
- case ISD::XOR: return "xor";
- case ISD::SHL: return "shl";
- case ISD::SRA: return "sra";
- case ISD::SRL: return "srl";
- case ISD::ROTL: return "rotl";
- case ISD::ROTR: return "rotr";
- case ISD::FADD: return "fadd";
- case ISD::FSUB: return "fsub";
- case ISD::FMUL: return "fmul";
- case ISD::FDIV: return "fdiv";
- case ISD::FMA: return "fma";
- case ISD::FREM: return "frem";
- case ISD::FCOPYSIGN: return "fcopysign";
- case ISD::FGETSIGN: return "fgetsign";
- case ISD::FPOW: return "fpow";
-
- case ISD::FPOWI: return "fpowi";
- case ISD::SETCC: return "setcc";
- case ISD::SELECT: return "select";
- case ISD::VSELECT: return "vselect";
- case ISD::SELECT_CC: return "select_cc";
- case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
- case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
- case ISD::CONCAT_VECTORS: return "concat_vectors";
- case ISD::INSERT_SUBVECTOR: return "insert_subvector";
- case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
- case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
- case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
- case ISD::CARRY_FALSE: return "carry_false";
- case ISD::ADDC: return "addc";
- case ISD::ADDE: return "adde";
- case ISD::SADDO: return "saddo";
- case ISD::UADDO: return "uaddo";
- case ISD::SSUBO: return "ssubo";
- case ISD::USUBO: return "usubo";
- case ISD::SMULO: return "smulo";
- case ISD::UMULO: return "umulo";
- case ISD::SUBC: return "subc";
- case ISD::SUBE: return "sube";
- case ISD::SHL_PARTS: return "shl_parts";
- case ISD::SRA_PARTS: return "sra_parts";
- case ISD::SRL_PARTS: return "srl_parts";
-
- // Conversion operators.
- case ISD::SIGN_EXTEND: return "sign_extend";
- case ISD::ZERO_EXTEND: return "zero_extend";
- case ISD::ANY_EXTEND: return "any_extend";
- case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
- case ISD::TRUNCATE: return "truncate";
- case ISD::FP_ROUND: return "fp_round";
- case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
- case ISD::FP_EXTEND: return "fp_extend";
-
- case ISD::SINT_TO_FP: return "sint_to_fp";
- case ISD::UINT_TO_FP: return "uint_to_fp";
- case ISD::FP_TO_SINT: return "fp_to_sint";
- case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BITCAST: return "bitcast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
- case ISD::CONVERT_RNDSAT: {
- switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
- default: llvm_unreachable("Unknown cvt code!");
- case ISD::CVT_FF: return "cvt_ff";
- case ISD::CVT_FS: return "cvt_fs";
- case ISD::CVT_FU: return "cvt_fu";
- case ISD::CVT_SF: return "cvt_sf";
- case ISD::CVT_UF: return "cvt_uf";
- case ISD::CVT_SS: return "cvt_ss";
- case ISD::CVT_SU: return "cvt_su";
- case ISD::CVT_US: return "cvt_us";
- case ISD::CVT_UU: return "cvt_uu";
- }
- }
-
- // Control flow instructions
- case ISD::BR: return "br";
- case ISD::BRIND: return "brind";
- case ISD::BR_JT: return "br_jt";
- case ISD::BRCOND: return "brcond";
- case ISD::BR_CC: return "br_cc";
- case ISD::CALLSEQ_START: return "callseq_start";
- case ISD::CALLSEQ_END: return "callseq_end";
-
- // Other operators
- case ISD::LOAD: return "load";
- case ISD::STORE: return "store";
- case ISD::VAARG: return "vaarg";
- case ISD::VACOPY: return "vacopy";
- case ISD::VAEND: return "vaend";
- case ISD::VASTART: return "vastart";
- case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
- case ISD::EXTRACT_ELEMENT: return "extract_element";
- case ISD::BUILD_PAIR: return "build_pair";
- case ISD::STACKSAVE: return "stacksave";
- case ISD::STACKRESTORE: return "stackrestore";
- case ISD::TRAP: return "trap";
-
- // Bit manipulation
- case ISD::BSWAP: return "bswap";
- case ISD::CTPOP: return "ctpop";
- case ISD::CTTZ: return "cttz";
- case ISD::CTLZ: return "ctlz";
-
- // Trampolines
- case ISD::INIT_TRAMPOLINE: return "init_trampoline";
- case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
- case ISD::CONDCODE:
- switch (cast<CondCodeSDNode>(this)->get()) {
- default: llvm_unreachable("Unknown setcc condition!");
- case ISD::SETOEQ: return "setoeq";
- case ISD::SETOGT: return "setogt";
- case ISD::SETOGE: return "setoge";
- case ISD::SETOLT: return "setolt";
- case ISD::SETOLE: return "setole";
- case ISD::SETONE: return "setone";
-
- case ISD::SETO: return "seto";
- case ISD::SETUO: return "setuo";
- case ISD::SETUEQ: return "setue";
- case ISD::SETUGT: return "setugt";
- case ISD::SETUGE: return "setuge";
- case ISD::SETULT: return "setult";
- case ISD::SETULE: return "setule";
- case ISD::SETUNE: return "setune";
-
- case ISD::SETEQ: return "seteq";
- case ISD::SETGT: return "setgt";
- case ISD::SETGE: return "setge";
- case ISD::SETLT: return "setlt";
- case ISD::SETLE: return "setle";
- case ISD::SETNE: return "setne";
- }
- }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
- switch (AM) {
- default:
- return "";
- case ISD::PRE_INC:
- return "<pre-inc>";
- case ISD::PRE_DEC:
- return "<pre-dec>";
- case ISD::POST_INC:
- return "<post-inc>";
- case ISD::POST_DEC:
- return "<post-dec>";
- }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
- std::string S = "< ";
-
- if (isZExt())
- S += "zext ";
- if (isSExt())
- S += "sext ";
- if (isInReg())
- S += "inreg ";
- if (isSRet())
- S += "sret ";
- if (isByVal())
- S += "byval ";
- if (isNest())
- S += "nest ";
- if (getByValAlign())
- S += "byval-align:" + utostr(getByValAlign()) + " ";
- if (getOrigAlign())
- S += "orig-align:" + utostr(getOrigAlign()) + " ";
- if (getByValSize())
- S += "byval-size:" + utostr(getByValSize()) + " ";
- return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
- print(dbgs(), G);
- dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (void*)this << ": ";
-
- for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
- if (i) OS << ",";
- if (getValueType(i) == MVT::Other)
- OS << "ch";
- else
- OS << getValueType(i).getEVTString();
- }
- OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
- if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
- if (!MN->memoperands_empty()) {
- OS << "<";
- OS << "Mem:";
- for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
- e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
- if (llvm::next(i) != e)
- OS << " ";
- }
- OS << ">";
- }
- } else if (const ShuffleVectorSDNode *SVN =
- dyn_cast<ShuffleVectorSDNode>(this)) {
- OS << "<";
- for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (i) OS << ",";
- if (Idx < 0)
- OS << "u";
- else
- OS << Idx;
- }
- OS << ">";
- } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
- OS << '<' << CSDN->getAPIntValue() << '>';
- } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
- OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
- OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
- else {
- OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
- OS << ")>";
- }
- } else if (const GlobalAddressSDNode *GADN =
- dyn_cast<GlobalAddressSDNode>(this)) {
- int64_t offset = GADN->getOffset();
- OS << '<';
- WriteAsOperand(OS, GADN->getGlobal());
- OS << '>';
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = GADN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
- OS << "<" << FIDN->getIndex() << ">";
- } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
- OS << "<" << JTDN->getIndex() << ">";
- if (unsigned int TF = JTDN->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
- int offset = CP->getOffset();
- if (CP->isMachineConstantPoolEntry())
- OS << "<" << *CP->getMachineCPVal() << ">";
- else
- OS << "<" << *CP->getConstVal() << ">";
- if (offset > 0)
- OS << " + " << offset;
- else
- OS << " " << offset;
- if (unsigned int TF = CP->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
- OS << "<";
- const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
- if (LBB)
- OS << LBB->getName() << " ";
- OS << (const void*)BBDN->getBasicBlock() << ">";
- } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
- } else if (const ExternalSymbolSDNode *ES =
- dyn_cast<ExternalSymbolSDNode>(this)) {
- OS << "'" << ES->getSymbol() << "'";
- if (unsigned int TF = ES->getTargetFlags())
- OS << " [TF=" << TF << ']';
- } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
- if (M->getValue())
- OS << "<" << M->getValue() << ">";
- else
- OS << "<null>";
- } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
- if (MD->getMD())
- OS << "<" << MD->getMD() << ">";
- else
- OS << "<null>";
- } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
- }
- else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
-
- bool doExt = true;
- switch (LD->getExtensionType()) {
- default: doExt = false; break;
- case ISD::EXTLOAD: OS << ", anyext"; break;
- case ISD::SEXTLOAD: OS << ", sext"; break;
- case ISD::ZEXTLOAD: OS << ", zext"; break;
- }
- if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(LD->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
-
- if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
- const char *AM = getIndexedModeName(ST->getAddressingMode());
- if (*AM)
- OS << ", " << AM;
-
- OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
- } else if (const BlockAddressSDNode *BA =
- dyn_cast<BlockAddressSDNode>(this)) {
- OS << "<";
- WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
- OS << ", ";
- WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
- OS << ">";
- if (unsigned int TF = BA->getTargetFlags())
- OS << " [TF=" << TF << ']';
- }
-
- if (G)
- if (unsigned Order = G->GetOrdering(this))
- OS << " [ORD=" << Order << ']';
-
- if (getNodeId() != -1)
- OS << " [ID=" << getNodeId() << ']';
-
- DebugLoc dl = getDebugLoc();
- if (G && !dl.isUnknown()) {
- DIScope
- Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
- OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << dl.getLine();
- if (dl.getCol() != 0)
- OS << ':' << dl.getCol();
- }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", "; else OS << " ";
- OS << (void*)getOperand(i).getNode();
- if (unsigned RN = getOperand(i).getResNo())
- OS << ":" << RN;
- }
- print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
- const SelectionDAG *G, unsigned depth,
- unsigned indent) {
- if (depth == 0)
- return;
-
- OS.indent(indent);
-
- N->print(OS, G);
-
- if (depth < 1)
- return;
-
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- // Don't follow chain operands.
- if (N->getOperand(i).getValueType() == MVT::Other)
- continue;
- OS << '\n';
- printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
- }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
- unsigned depth) const {
- printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
- printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
- // Don't print impossibly deep things.
- dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i).getNode()->hasOneUse())
- DumpNodes(N->getOperand(i).getNode(), indent+2, G);
- else
- dbgs() << "\n" << std::string(indent+2, ' ')
- << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
- dbgs() << "\n";
- dbgs().indent(indent);
- N->dump(G);
-}
-
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
assert(N->getNumValues() == 1 &&
"Can't unroll a vector with multiple results!");
@@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- // If GV has specified alignment, then use it. Otherwise, use the preferred
- // alignment.
- unsigned Align = GV->getAlignment();
- if (!Align) {
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
- if (GVar->hasInitializer()) {
- const TargetData *TD = TLI.getTargetData();
- Align = TD->getPreferredAlignment(GVar);
- }
- }
- if (!Align)
- Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
- }
- return MinAlign(Align, GVOffset);
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI.getTargetData());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
@@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
-void SelectionDAG::dump() const {
- dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
- for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
- I != E; ++I) {
- const SDNode *N = I;
- if (!N->hasOneUse() && N != getRoot().getNode())
- DumpNodes(N, 2, this);
- }
-
- if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
- dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
- print_types(OS, G);
- print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
- const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
- return;
-
- // Dump the current SDNode, but don't end the line yet.
- OS << std::string(indent, ' ');
- N->printr(OS, G);
-
- // Having printed this SDNode, walk the children:
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
-
- if (i) OS << ",";
- OS << " ";
-
- if (child->getNumOperands() == 0) {
- // This child has no grandchildren; print it inline right here.
- child->printr(OS, G);
- once.insert(child);
- } else { // Just the address. FIXME: also print the child's opcode.
- OS << (void*)child;
- if (unsigned RN = N->getOperand(i).getResNo())
- OS << ":" << RN;
- }
- }
-
- OS << "\n";
-
- // Dump children that have grandchildren on their own line(s).
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- const SDNode *child = N->getOperand(i).getNode();
- DumpNodesr(OS, child, indent+2, G, once);
- }
-}
-
-void SDNode::dumpr() const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
- VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 095b4001696f..94cb95804f69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,13 +41,13 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
- DAG.getIntPtrConstant(1));
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
- return SDValue();
}
/// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
@@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
AA = &aa;
GFI = gfi;
+ LibInfo = li;
TD = DAG.getTarget().getTargetData();
LPadToCallSiteMap.clear();
}
@@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getMergeValues(&Constants[0], Constants.size(),
getCurDebugLoc());
}
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
SmallVector<SDValue, 16> Ops;
- if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (unsigned i = 0; i != NumElements; ++i)
- Ops.push_back(getValue(CP->getOperand(i)));
+ Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
llvm_unreachable("Can't get register for value!");
- return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (!BPI)
return 0;
@@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
Condition = getICmpCondCode(IC->getPredicate());
} else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = ISD::SETEQ; // silence warning.
llvm_unreachable("Unknown compare instruction");
@@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
CopyToExportRegsIfNeeded(&I);
// Update successor info
- InvokeMBB->addSuccessor(Return);
- InvokeMBB->addSuccessor(LandingPad);
+ addSuccessorWithWeight(InvokeMBB, Return);
+ addSuccessorWithWeight(InvokeMBB, LandingPad);
// Drop into normal successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
@@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
AddLandingPadInfo(LP, MMI, MBB);
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, LP.getType(), ValueVTs);
@@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !DisableJumpTables &&
+ return !TLI.getTargetMachine().Options.DisableJumpTables &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
@@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- Constant *C = Pivot->Low;
+ const Constant *C = Pivot->Low;
MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
// We know that we branch to the LHS if the Value being switched on is
@@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
- for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- BasicBlock *SuccBB = SI.getSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+ i != e; ++i) {
+ const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
- Cases.push_back(Case(SI.getSuccessorValue(i),
- SI.getSuccessorValue(i),
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumCases() == 1) {
+ if (!SI.getNumCases()) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
}
@@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
- DestVT, N, DAG.getIntPtrConstant(0)));
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I){
- // FPTrunc is never a no-op cast, no need to check
+ // FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
TLI.getValueType(I.getType()), InVec, InIdx));
}
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
- unsigned MaskNumElts = Mask.size();
- for (unsigned i = 0; i != MaskNumElts; ++i)
- if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ unsigned Pos, unsigned Size, int Low) {
+ for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
return false;
return true;
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
- SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- // Convert the ConstantVector mask operand into an array of ints, with -1
- // representing undef values.
- SmallVector<Constant*, 8> MaskElts;
- cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
- unsigned MaskNumElts = MaskElts.size();
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (isa<UndefValue>(MaskElts[i]))
- Mask.push_back(-1);
- else
- Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
- }
-
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// lengths match.
- if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
- VT, Src1, Src2));
- return;
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
}
// Pad both vectors with undefs to make them the same length as the mask.
@@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx);
- else
- MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts+1),
- static_cast<int>(SrcNumElts+1)};
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
int MaxRange[2] = {-1, -1};
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- int Input = 0;
+ unsigned Input = 0;
if (Idx < 0)
continue;
@@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Check if the access is smaller than the vector size and can we find
// a reasonable extract index.
- int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not
- // Extract.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
int StartIdx[2]; // StartIdx to extract from
- for (int Input=0; Input < 2; ++Input) {
- if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
RangeUse[Input] = 0; // Unused
StartIdx[Input] = 0;
- } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
- // Fits within range but we should see if we can find a good
- // start index that is a multiple of the mask length.
- if (MaxRange[Input] < (int)MaskNumElts) {
- RangeUse[Input] = 1; // Extract from beginning of the vector
- StartIdx[Input] = 0;
- } else {
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
- }
+ continue;
}
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
if (RangeUse[0] == 0 && RangeUse[1] == 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
// Extract appropriate subvector and generate a vector shuffle
- for (int Input=0; Input < 2; ++Input) {
+ for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
@@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SmallVector<int, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
- if (Idx < 0)
- MappedOps.push_back(Idx);
- else if (Idx < (int)SrcNumElts)
- MappedOps.push_back(Idx - StartIdx[0]);
- else
- MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
}
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (Mask[i] < 0) {
- Ops.push_back(DAG.getUNDEF(EltVT));
- } else {
- int Idx = Mask[i];
- SDValue Res;
+ int Idx = Mask[i];
+ SDValue Res;
- if (Idx < (int)SrcNumElts)
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src1, DAG.getConstant(Idx, PtrVT));
- else
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
- EltVT, Src2,
- DAG.getConstant(Idx - SrcNumElts, PtrVT));
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Ops.push_back(Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src, DAG.getConstant(Idx, PtrVT));
}
+
+ Ops.push_back(Res);
}
setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- Type *Ty = I.getOperand(0)->getType();
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Amt = ElementSize.logBase2();
IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
N.getValueType(), IdxN,
- DAG.getConstant(Amt, TLI.getPointerTy()));
+ DAG.getConstant(Amt, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isInvariant = I.getMetadata("invariant.load") != 0;
unsigned Alignment = I.getAlignment();
const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ Ranges);
Values[i] = L;
Chains[ChainI] = L.getValue(1);
@@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
DebugLoc dl = getCurDebugLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
- default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
SDValue Op = getValue(I.getArgOperand(i));
- assert(TLI.isTypeLegal(Op.getValueType()) &&
- "Intrinsic uses a non-legal type?");
Ops.push_back(Op);
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
- for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
- assert(TLI.isTypeLegal(ValueVTs[Val]) &&
- "Intrinsic uses a non-legal type?");
- }
-#endif // NDEBUG
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
setValue(&I, Result);
+ } else {
+ // Assign order to result here. If the intrinsic does not produce a result,
+ // it won't be mapped to a SDNode and visit() will not assign it an order
+ // number.
+ ++SDNodeOrder;
+ AssignOrderingToNode(Result.getNode());
}
}
@@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
- SDValue Op1 = getValue(I.getArgOperand(0));
- SDValue Op2 = getValue(I.getArgOperand(1));
-
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
- setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
- return 0;
-}
-
/// visitExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
void
@@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
const SDValue &CFR = Ext.getOperand(0);
if (CFR.getOpcode() == ISD::CopyFromReg)
return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
- else
- if (CFR.getOpcode() == ISD::TRUNCATE)
- return getTruncatedArgReg(CFR);
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
}
return 0;
}
@@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Reg = TRI->getFrameRegister(MF);
+ Reg = TRI->getFrameRegister(MF);
if (!Reg && N.getNode()) {
if (N.getOpcode() == ISD::CopyFromReg)
@@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
- return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
const Value *Address = DI.getAddress();
- if (!Address || !DIVariable(Variable).Verify())
+ if (!Address || !DIVariable(Variable).Verify()) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
+ }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
@@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
}
@@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
if (isParameter && !AI) {
@@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
return 0;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
}
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return 0;
@@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
V = BCI->getOperand(0);
const AllocaInst *AI = dyn_cast<AllocaInst>(V);
// Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return 0;
+ }
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
return 0;
}
- case Intrinsic::eh_exception: {
- // Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[1];
- Ops[0] = DAG.getRoot();
- SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
- setValue(&I, Op);
- DAG.setRoot(Op.getValue(1));
- return 0;
- }
-
- case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBB;
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (CallMBB->isLandingPad())
- AddCatchInfo(I, &MMI, CallMBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(&I);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- // Insert the EHSELECTION instruction.
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[2];
- Ops[0] = getValue(I.getArgOperand(0));
- Ops[1] = getRoot();
- SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
- DAG.setRoot(Op.getValue(1));
- setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
- return 0;
- }
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
@@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
- case Intrinsic::eh_sjlj_dispatch_setup: {
- DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot(), getValue(I.getArgOperand(0))));
- return 0;
- }
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
@@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256:
+ case Intrinsic::x86_avx2_vinserti128: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, MVT::i32));
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertuu: {
ISD::CvtCode Code = ISD::CVT_INVALID;
switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::convertff: Code = ISD::CVT_FF; break;
case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
@@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- return 0;
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
@@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::trap: {
- StringRef TrapFuncName = getTrapFunctionName();
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
return 0;
@@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result =
TLI.LowerCallTo(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
DAG.setRoot(Result.second);
return 0;
}
case Intrinsic::uadd_with_overflow:
- return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
- return implVisitAluOverflow(I, ISD::SADDO);
case Intrinsic::usub_with_overflow:
- return implVisitAluOverflow(I, ISD::USUBO);
case Intrinsic::ssub_with_overflow:
- return implVisitAluOverflow(I, ISD::SSUBO);
case Intrinsic::umul_with_overflow:
- return implVisitAluOverflow(I, ISD::UMULO);
- case Intrinsic::smul_with_overflow:
- return implVisitAluOverflow(I, ISD::SMULO);
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ return 0;
+ }
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If there's a possibility that fast-isel has already selected some amount
// of the current basic block, don't emit a tail call.
- if (isTailCall && EnableFastISel)
+ if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
std::pair<SDValue,SDValue> Result =
@@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
CS.getCallingConv(),
isTailCall,
+ CS.doesNotReturn(),
!CS.getInstruction()->use_empty(),
Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
@@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
Add,
MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
- false, false, 1);
+ false, false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
- false /*nontemporal*/, 1 /* align=1 */);
+ false /*nontemporal*/,
+ false /*isinvariant*/, 1 /* align=1 */);
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- // See if any floating point values are being passed to this function. This is
- // used to emit an undefined reference to fltused on Windows.
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (FT->isVarArg() &&
- !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type* T = I.getArgOperand(i)->getType();
- for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
- i != e; ++i) {
- if (!i->isFloatingPointTy()) continue;
- MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
- break;
- }
- }
- }
+ ComputeUsesVAFloatArgument(I, &MMI);
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
@@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// can't be a library call.
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+ if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+ (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+ (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LHS.getValueType(), LHS, RHS));
return;
}
- } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+ (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+ (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType()) {
@@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+ (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+ (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+ (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+ (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+ (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+ (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
+ } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+ (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+ (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+ (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+ (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+ (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+ (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+ (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+ (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+ (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+ (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+ (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+ (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+ (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+ (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
} else if (Name == "memcmp") {
if (visitMemCmpCall(I))
return;
@@ -5596,22 +5690,6 @@ public:
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
- /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
- /// busy in OutputRegs/InputRegs.
- void MarkAllocatedRegs(bool isOutReg, bool isInReg,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs,
- const TargetRegisterInfo &TRI) const {
- if (isOutReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
- }
- if (isInReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
- }
- }
-
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
@@ -5659,18 +5737,6 @@ public:
return TLI.getValueType(OpTy, true);
}
-
-private:
- /// MarkRegAndAliases - Mark the specified register and all aliases in the
- /// specified set.
- static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
- const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
- Regs.insert(Reg);
- if (const unsigned *Aliases = TRI.getAliasSet(Reg))
- for (; *Aliases; ++Aliases)
- Regs.insert(*Aliases);
- }
};
typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-/// Input and OutputRegs are the set of already allocated physical registers.
///
static void GetRegistersForValue(SelectionDAG &DAG,
const TargetLowering &TLI,
DebugLoc DL,
- SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
+ SDISelAsmOperandInfo &OpInfo) {
LLVMContext &Context = *DAG.getContext();
- // Compute whether this value requires an input register, an output register,
- // or both.
- bool isOutReg = false;
- bool isInReg = false;
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- isOutReg = true;
-
- // If there is an input constraint that matches this, we need to reserve
- // the input register so no other inputs allocate to it.
- isInReg = OpInfo.hasMatchingInput();
- break;
- case InlineAsm::isInput:
- isInReg = true;
- isOutReg = false;
- break;
- case InlineAsm::isClobber:
- isOutReg = true;
- isInReg = true;
- break;
- }
-
-
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
- OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
}
@@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- std::set<unsigned> OutputRegs, InputRegs;
-
TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI.ParseConstraints(CS);
@@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
- isa<ConstantVector>(OpVal)) {
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
TLI.getPointerTy());
} else {
@@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the output from the appropriate register. Find a register that
// we can use.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate output reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// If this is an indirect operand, store through the pointer after the
// asm.
@@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
- if (Ops.empty())
- report_fatal_error("Invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
@@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Don't know how to handle indirect register inputs yet!");
// Copy the input into the appropriate registers.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag);
@@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
- bool isReturnValueUsed,
+ bool doesNotRet, bool isReturnValueUsed,
SDValue Callee,
ArgListTy &Args, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
@@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
- return SDValue();
}
void
@@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
- if (EnableFastISel)
+ if (FastISel)
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
@@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SDB->getCurDebugLoc());
SDB->setValue(I, Res);
- if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
@@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general. It's also subtly incompatible with the hacks FastISel
@@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I)) {
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3472ca..8393b414926a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,11 +67,11 @@ class SIToFPInst;
class StoreInst;
class SwitchInst;
class TargetData;
+class TargetLibraryInfo;
class TargetLowering;
class TruncInst;
class UIToFPInst;
class UnreachableInst;
-class UnwindInst;
class VAArgInst;
class ZExtInst;
@@ -129,13 +129,13 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- Constant* Low;
- Constant* High;
+ const Constant *Low;
+ const Constant *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
@@ -294,6 +294,7 @@ public:
SelectionDAG &DAG;
const TargetData *TD;
AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
@@ -338,7 +339,8 @@ public:
HasTailCall(false), Context(dag.getContext()) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
/// clear - Clear out the current SelectionDAG and the associated
/// state and prepare this SelectionDAGBuilder object to be used
@@ -451,7 +453,8 @@ private:
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
- uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
uint32_t Weight = 0);
public:
@@ -471,7 +474,6 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitUnwind(const UnwindInst &I);
void visitBinary(const User &I, unsigned OpCode);
void visitShift(const User &I, unsigned Opcode);
@@ -554,8 +556,6 @@ private:
void visitUserOp2(const Instruction &I) {
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
-
- const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..f981afb437b0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::LSDAADDR: return "LSDAADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+ case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (llvm::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ OS << "<";
+ WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+ OS << ", ";
+ WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+ OS << ">";
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ }
+
+ if (G)
+ if (unsigned Order = G->GetOrdering(this))
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ dbgs() << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+ dbgs() << '\n';
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+
+ if (i) OS << ",";
+ OS << " ";
+
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode.
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ // Don't follow chain operands.
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68b9146adfe1..605509bd227a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -142,14 +217,15 @@ namespace llvm {
CodeGenOpt::Level OptLevel) {
const TargetLowering &TLI = IS->getTargetLowering();
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None ||
+ TLI.getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
- if (TLI.getSchedulingPreference() == Sched::Latency)
- return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
@@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
"TargetLowering::EmitInstrWithCustomInserter!";
#endif
llvm_unreachable(0);
- return 0;
}
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- assert(!MI->getDesc().hasPostISelHook() &&
+ assert(!MI->hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
}
@@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm)),
+ CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || EnableFastISel) &&
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
- assert((!EnableFastISelAbort || EnableFastISel) &&
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
@@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
FuncInfo->BPI = 0;
- SDB->init(GFI, *AA);
+ SDB->init(GFI, *AA, LibInfo);
SelectAllBasicBlocks(Fn);
@@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII.get(TargetOpcode::DBG_VALUE))
.addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
.addImm(Offset).addMetadata(Variable);
- EntryMBB->insertAfter(CopyUseMI, NewMI);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
}
}
}
@@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
Worklist.push_back(CurDAG->getRoot().getNode());
- APInt Mask;
APInt KnownZero;
APInt KnownOne;
@@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
- CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
} while (!Worklist.empty());
}
@@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#endif
{
BlockNumber = FuncInfo->MBB->getNumber();
- BlockName = MF->getFunction()->getNameStr() + ":" +
- FuncInfo->MBB->getBasicBlock()->getNameStr();
+ BlockName = MF->getFunction()->getName().str() + ":" +
+ FuncInfo->MBB->getBasicBlock()->getName().str();
}
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
@@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in pre-legalize mode.
{
NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize types", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-legalize mode.
{
NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Scheduling", GroupName,
TimePassesIsEnabled);
- Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
- FuncInfo->InsertPt = Scheduler->InsertPos;
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
}
// If the block was split, make sure we update any references that are used to
@@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
+
const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
if (Reg) MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
if (Reg) MBB->addLiveIn(Reg);
-
- // FIXME: Hack around an exception handling flaw (PR1508): the personality
- // function and list of typeids logically belong to the invoke (or, if you
- // like, the basic block containing the invoke), and need to be associated
- // with it in the dwarf exception handling tables. Currently however the
- // information is provided by an intrinsic (eh.selector) that can be moved
- // to unexpected places by the optimizers: if the unwind edge is critical,
- // then breaking it can result in the intrinsics being in the successor of
- // the landing pad, not the landing pad itself. This results
- // in exceptions not being caught because no typeids are associated with
- // the invoke. This may not be the only way things can go wrong, but it
- // is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
- if (Br && Br->isUnconditional()) { // Critical edge?
- BasicBlock::const_iterator I, E;
- for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
- if (isa<EHSelectorInst>(I))
- break;
-
- if (I == E)
- // No catch info found - try to extract some from the successor.
- CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
- }
}
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
- if (EnableFastISel)
+ if (TM.Options.EnableFastISel)
FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
@@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->setLastLocalValue(0);
}
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
const Instruction *Inst = llvm::prior(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
continue;
+ }
// Bottom-up: reset the insert pos at the top, after any local-value
// instructions.
@@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
+ --NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
// then see if there is a load right before the selected instructions.
@@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
continue;
}
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
- ++NumFastIselFailures;
+
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
Inst->dump();
@@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
bool HadTailCall = false;
SelectBasicBlock(Inst, BI, HadTailCall);
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
--BI;
break;
}
+ NumFastIselRemaining = RemainingNow;
continue;
}
if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
// Don't abort, and use a different message for terminator misses.
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed terminator: ";
Inst->dump();
}
} else {
- ++NumFastIselFailures;
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
Inst->dump();
@@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
APInt NeededMask = DesiredMask & ~ActualMask;
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+ CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
// If all the missing bits in the or are already known to be set, match!
if ((NeededMask & KnownOne) == NeededMask)
@@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::EntryToken: // These nodes remain the same.
case ISD::BasicBlock:
case ISD::Register:
+ case ISD::RegisterMask:
//case ISD::VALUETYPE:
//case ISD::CONDCODE:
case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cd1647b17b9b..6cde05aea82a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,7 +27,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
using namespace llvm;
namespace llvm {
@@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
void SelectionDAG::viewGraph(const std::string &Title) {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
false, Title);
#else
errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 907d8d9da1af..09a2b1f3d7a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,31 +36,9 @@ using namespace llvm;
/// - the promotion of vector elements. This feature is disabled by default
/// and only enabled using this flag.
static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
cl::desc("Allow promotion of integer vector element types"));
-namespace llvm {
-TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
- bool isLocal = GV->hasLocalLinkage();
- bool isDeclaration = GV->isDeclaration();
- // FIXME: what should we do for protected and internal visibility?
- // For variables, is internal different from hidden?
- bool isHidden = GV->hasHiddenVisibility();
-
- if (reloc == Reloc::PIC_) {
- if (isLocal || isHidden)
- return TLSModel::LocalDynamic;
- else
- return TLSModel::GeneralDynamic;
- } else {
- if (!isDeclaration || isHidden)
- return TLSModel::LocalExec;
- else
- return TLSModel::InitialExec;
- }
-}
-}
-
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names) {
@@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10,MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10,MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::Latency;
+ SchedPreferenceInfo = Sched::ILP;
JumpBufSize = 0;
JumpBufAlignment = 0;
MinFunctionAlignment = 0;
@@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const {
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
- if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+
return Table;
}
@@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
- TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
- KnownZero = ~KnownOne & NewMask;
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
return false; // Don't fall through, will infinitely loop.
case ISD::AND:
// If the RHS is a constant, check to see if the LHS would be zero without
@@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getNode()->getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- if ((APInt::getHighBitsSet(BitWidth,
- BitWidth - InnerVT.getSizeInBits()) &
- DemandedMask) == 0 &&
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask == 1)
+ if (NewMask == 1)
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits());
+ BitWidth - ExVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
- EVT.getScalarType().getSizeInBits()) &
+ ExVT.getScalarType().getSizeInBits()) &
NewMask;
// Since the sign extended bits are demanded, we know that the sign
@@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op,
- TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
@@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
- KnownOne |= NewBits;
- KnownZero &= ~NewBits;
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
} else { // Otherwise, top bits aren't known.
- KnownOne &= ~NewBits;
- KnownZero &= ~NewBits;
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
}
break;
}
@@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!Op.getOperand(0).getValueType().isVector() &&
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
@@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// FALL THROUGH
default:
// Just use ComputeMaskedBits to compute output bits.
- TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
break;
}
@@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
}
/// ComputeNumSignBitsForTargetNode - This method can be implemented by
@@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// Fall back to ComputeMaskedBits to catch other known cases.
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
return (KnownZero.countPopulation() == BitWidth - 1) &&
(KnownOne.countPopulation() == 1);
}
@@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
- false, false, NewAlign);
+ false, false, false, NewAlign);
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger())
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (N0.getValueType().isInteger()) {
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ case ZeroOrNegativeOneBooleanContent:
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
+ }
+ }
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
@@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
}
}
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
// Simplify (X+Z) == X --> Z == 0
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (DAG.isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, N0.getValueType()), Cond);
- else if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
}
}
}
@@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
- default: llvm_unreachable("Unknown constraint type!");
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_Memory:
return 3;
}
+ llvm_unreachable("Invalid constraint type");
}
/// Examine constraint type and operand type and determine a weight value.
@@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
@@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
- if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
N->getOperand(0),
DAG.getConstant(magics.m, VT)).getNode(), 1);
@@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
- std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
@@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
- if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
- else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
DAG.getConstant(magics.m, VT)).getNode(), 1);
else