aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp425
1 files changed, 278 insertions, 147 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f000ce38d367..64244313a326 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12,42 +12,43 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
-#include "SDNodeOrdering.h"
#include "SDNodeDbgValue.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -59,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
return Res;
}
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unknown FP format");
- case MVT::f16: return &APFloat::IEEEhalf;
- case MVT::f32: return &APFloat::IEEEsingle;
- case MVT::f64: return &APFloat::IEEEdouble;
- case MVT::f80: return &APFloat::x87DoubleExtended;
- case MVT::f128: return &APFloat::IEEEquad;
- case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
- }
-}
-
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
@@ -94,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
- (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
&losesInfo);
return !losesInfo;
}
@@ -884,15 +874,17 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),
+ getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf) {
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
MF = &mf;
+ TTI = tti;
Context = &mf.getFunction()->getContext();
}
@@ -1074,10 +1066,11 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
return getConstantFP(APFloat((float)Val), VT, isTarget);
else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
- else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
+ EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
- apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
} else
@@ -1525,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1924,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- if (ISD::isZEXTLoad(Op.getNode())) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2294,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
break;
}
- // Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
- unsigned ExtType = LD->getExtensionType();
- switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp;
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
}
}
@@ -2438,7 +2435,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -2446,9 +2444,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
}
case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), VT);
@@ -2495,7 +2493,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -2686,44 +2684,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return SDValue(N, 0);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
- EVT VT,
- ConstantSDNode *Cst1,
- ConstantSDNode *Cst2) {
- const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
- switch (Opcode) {
- case ISD::ADD: return getConstant(C1 + C2, VT);
- case ISD::SUB: return getConstant(C1 - C2, VT);
- case ISD::MUL: return getConstant(C1 * C2, VT);
- case ISD::UDIV:
- if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
- break;
- case ISD::UREM:
- if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
- break;
- case ISD::SDIV:
- if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
- break;
- case ISD::SREM:
- if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
- break;
- case ISD::AND: return getConstant(C1 & C2, VT);
- case ISD::OR: return getConstant(C1 | C2, VT);
- case ISD::XOR: return getConstant(C1 ^ C2, VT);
- case ISD::SHL: return getConstant(C1 << C2, VT);
- case ISD::SRL: return getConstant(C1.lshr(C2), VT);
- case ISD::SRA: return getConstant(C1.ashr(C2), VT);
- case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
- case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
- default: break;
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
}
- return SDValue();
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
+ }
+
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
}
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
- SDValue N1, SDValue N2) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -2845,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
// Verify that the shift amount VT is bit enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
@@ -3019,16 +3092,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
}
- if (N1C) {
- if (N2C) {
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
- if (SV.getNode()) return SV;
- } else { // Cannonicalize constant to RHS if commutative
- if (isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
- }
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
}
// Constant fold FP operations.
@@ -3036,7 +3107,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Cannonicalize constant to RHS if commutative
+ // Canonicalize constant to RHS if commutative.
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
} else if (N2CFP) {
@@ -3080,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -3312,17 +3383,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
-/// SplatByte - Distribute ByteVal over NumBits bits.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
- APInt Val = APInt(NumBits, ByteVal);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
- return Val;
-}
-
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3331,17 +3391,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
if (VT.isInteger())
return DAG.getConstant(Val, VT);
- return DAG.getConstantFP(APFloat(Val), VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
}
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
if (NumBits > 8) {
// Use a multiplication with 0x010101... to extend the input to the
// required length.
- APInt Magic = SplatByte(NumBits, 0x01);
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
@@ -3370,10 +3431,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
- uint64_t Val = 0;
+ APInt Val(NumVTBits, 0);
if (TLI.isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
Val |= (uint64_t)(unsigned char)Str[i] << i*8;
@@ -3382,7 +3444,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
- return DAG.getConstant(Val, VT);
+ // If the "cost" of materializing the integer immediate is 1 or free, then
+ // it is cost effective to turn the load into the immediate.
+ const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
+ if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+ return DAG.getConstant(Val, VT);
+ return SDValue(0, 0);
}
/// getMemBasePlusOffset - Returns base and offset node for the
@@ -3420,8 +3487,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset,
+ bool ZeroMemset,
bool MemcpyStrSrc,
+ bool AllowOverlap,
SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3434,7 +3503,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- IsZeroVal, MemcpyStrSrc,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
DAG.getMachineFunction());
if (VT == MVT::Other) {
@@ -3464,21 +3533,51 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
- VT = MVT::i64;
- while (!TLI.isTypeLegal(VT))
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
- VTSize = VT.getSizeInBits() / 8;
- } else {
- // This can result in a type that is not legal on the target, e.g.
- // 1 or 2 bytes on PPC.
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
- VTSize >>= 1;
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ TLI.isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ // FIXME: Only does this for 64-bit or more since we don't have proper
+ // cost model for unaligned load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap &&
+ VTSize >= 8 && NewVTSize < Size &&
+ TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
+
MemOps.push_back(VT);
Size -= VTSize;
}
@@ -3507,8 +3606,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
bool OptSize =
- MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3523,12 +3622,21 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
- true, CopyFromStr, DAG, TLI))
+ false, false, CopyFromStr, true, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3545,6 +3653,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value, Store;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
if (CopyFromStr &&
(isZeroStr || (VT.isInteger() && !VT.isVector()))) {
// It's unlikely a store of a vector immediate can be done in a single
@@ -3553,11 +3669,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
- Store = DAG.getStore(Chain, dl, Value,
- getMemBasePlusOffset(Dst, DstOff, DAG),
- DstPtrInfo.getWithOffset(DstOff), isVol,
- false, Align);
- } else {
+ if (Value.getNode())
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
+ }
+
+ if (!Store.getNode()) {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
@@ -3577,6 +3696,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
+ Size -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3601,8 +3721,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3612,8 +3732,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
- (DstAlignCanChange ? 0 : Align),
- SrcAlign, true, false, DAG, TLI))
+ (DstAlignCanChange ? 0 : Align), SrcAlign,
+ false, false, false, false, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -3680,8 +3800,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3689,7 +3809,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
- IsZeroVal, false, DAG, TLI))
+ true, IsZeroVal, false, true, DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -3716,6 +3836,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
// If this store is smaller than the largest store see whether we can get
// the smaller value for free with a truncate.
@@ -3734,6 +3861,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
isVol, false, Align);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3745,6 +3873,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3812,6 +3941,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3866,6 +3996,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -4577,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
- const std::vector<EVT> &ResultTys,
+ ArrayRef<EVT> ResultTys,
const SDValue *Ops, unsigned NumOps) {
return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
Ops, NumOps);
@@ -5229,7 +5360,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
- const std::vector<EVT> &ResultTys,
+ ArrayRef<EVT> ResultTys,
const SDValue *Ops, unsigned NumOps) {
SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
return getMachineNode(Opcode, dl, VTs, Ops, NumOps);