src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2016-12-02 19:36:28 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-12-02 19:36:28 +0000
commit	4dcfa144380f1274ecb91e53c76ee30a99254afa (patch)
tree	d6a1e7d89e686c20ae498a12cd06039e94ac6166 /contrib/llvm/lib
parent	33e643f70b30dcca5af297caa76aa6de3ad1392e (diff)
parent	545937e1be2eb318dc3c8db284ab27a64c51c773 (diff)
download	src-4dcfa144380f1274ecb91e53c76ee30a99254afa.tar.gz src-4dcfa144380f1274ecb91e53c76ee30a99254afa.zip

Update llvm, clang, lld and lldb to release_39 branch r288513.

Notes

Notes: svn path=/projects/clang391-import/; revision=309437

Diffstat (limited to 'contrib/llvm/lib')

-rw-r--r--

contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

-rw-r--r--

contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

-rw-r--r--

contrib/llvm/lib/Target/AMDGPU/SIInstructions.td

-rw-r--r--

contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

7 files changed, 52 insertions, 25 deletions

diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3ab9459c8af7..9a18943291c8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

@@ -2185,24 +2185,29 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,

// options. This is a trivially-generalized version of the code from

// Hacker's Delight (itself derived from Knuth's Algorithm M from section

// 4.3.1).

- SDValue Mask =

- DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(),

- NVT.getSizeInBits() >> 1), dl, NVT);

+ unsigned Bits = NVT.getSizeInBits();

+ unsigned HalfBits = Bits >> 1;

+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,

+ NVT);

SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);

SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);

SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);

SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);

- SDValue Shift =

- DAG.getConstant(NVT.getSizeInBits() >> 1, dl,

- TLI.getShiftAmountTy(NVT, DAG.getDataLayout()));

+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

+ if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {

+ // The type from TLI is too small to fit the shift amount we want.

+ // Override it with i32. The shift will have to be legalized.

+ ShiftAmtTy = MVT::i32;

+ }

+ SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);

SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);

SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);

SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);

SDValue U = DAG.getNode(ISD::ADD, dl, NVT,

- DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL);

+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);

SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);

SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);

@@ -2211,14 +2216,14 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,

SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);

SDValue W = DAG.getNode(ISD::ADD, dl, NVT,

- DAG.getNode(ISD::MUL, dl, NVT, LL, RL),

+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),

DAG.getNode(ISD::ADD, dl, NVT, UH, VH));

- Lo = DAG.getNode(ISD::ADD, dl, NVT, TH,

+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,

DAG.getNode(ISD::SHL, dl, NVT, V, Shift));

Hi = DAG.getNode(ISD::ADD, dl, NVT, W,

DAG.getNode(ISD::ADD, dl, NVT,

- DAG.getNode(ISD::MUL, dl, NVT, RH, LL),

+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),

DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));

return;

}

diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5cc6a4e0e83e..919081902a9c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

@@ -2203,7 +2203,8 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,

}

void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {

- MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

+ MachineFunction &MF = *MI.getParent()->getParent();

+ MachineRegisterInfo &MRI = MF.getRegInfo();

// Legalize VOP2

if (isVOP2(MI) || isVOPC(MI)) {

@@ -2321,8 +2322,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {

return;

}

- // Legalize MIMG

- if (isMIMG(MI)) {

+ // Legalize MIMG and MUBUF/MTBUF for shaders.

+ //

+ // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via

+ // scratch memory access. In both cases, the legalization never involves

+ // conversion to the addr64 form.

+ if (isMIMG(MI) ||

+ (AMDGPU::isShader(MF.getFunction()->getCallingConv()) &&

+ (isMUBUF(MI) || isMTBUF(MI)))) {

MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);

if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {

unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);

@@ -2337,9 +2344,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {

return;

}

- // Legalize MUBUF* instructions

+ // Legalize MUBUF* instructions by converting to addr64 form.

// FIXME: If we start using the non-addr64 instructions for compute, we

- // may need to legalize them here.

+ // may need to legalize them as above. This especially applies to the

+ // buffer_load_format_* variants and variants with idxen (or bothen).

int SRsrcIdx =

AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);

if (SRsrcIdx != -1) {

diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index 18b7d5d62efe..dde5f2fc6b40 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td

@@ -2029,6 +2029,7 @@ def SI_RETURN : PseudoInstSI <

let hasSideEffects = 1;

let SALU = 1;

let hasNoSchedulingInfo = 1;

+ let DisableWQM = 1;

}

let Uses = [EXEC], Defs = [EXEC, VCC, M0],

diff --git a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index b200c153df0b..1534d5825696 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

@@ -219,13 +219,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,

markInstruction(MI, Flags, Worklist);

GlobalFlags |= Flags;

}

- if (WQMOutputs && MBB.succ_empty()) {

- // This is a prolog shader. Make sure we go back to exact mode at the end.

- Blocks[&MBB].OutNeeds = StateExact;

- Worklist.push_back(&MBB);

- GlobalFlags |= StateExact;

- }

}

return GlobalFlags;

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bfd73f4bbac5..961497fe3c2d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

@@ -634,7 +634,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,

}

if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&

- !isa<GEPOperator>(V) && !isa<PHINode>(V))

+ !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))

// We've found some value that we can't explore which is different from

// the base. Therefore we can't do this transformation.

return false;

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index d312983ed51b..d88456ee4adc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

@@ -579,6 +579,13 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {

UndefValue::get(T), NewLoad, 0, Name));

}

+ // Bail out if the array is too large. Ideally we would like to optimize

+ // arrays of arbitrary size but this has a terrible impact on compile time.

+ // The threshold here is chosen arbitrarily, maybe needs a little bit of

+ // tuning.

+ if (NumElements > 1024)

+ return nullptr;

const DataLayout &DL = IC.getDataLayout();

auto EltSize = DL.getTypeAllocSize(ET);

auto Align = LI.getAlignment();

@@ -1081,6 +1088,13 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {

return true;

}

+ // Bail out if the array is too large. Ideally we would like to optimize

+ // arrays of arbitrary size but this has a terrible impact on compile time.

+ // The threshold here is chosen arbitrarily, maybe needs a little bit of

+ // tuning.

+ if (NumElements > 1024)

+ return false;

const DataLayout &DL = IC.getDataLayout();

auto EltSize = DL.getTypeAllocSize(AT->getElementType());

auto Align = SI.getAlignment();

diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 0504646c304e..c197317ac771 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,

// Move all 'aggressive' instructions, which are defined in the

// conditional parts of the if's up to the dominating block.

- if (IfBlock1)

+ if (IfBlock1) {

+ for (auto &I : *IfBlock1)

+ I.dropUnknownNonDebugMetadata();

DomBlock->getInstList().splice(InsertPt->getIterator(),

IfBlock1->getInstList(), IfBlock1->begin(),

IfBlock1->getTerminator()->getIterator());

- if (IfBlock2)

+ }

+ if (IfBlock2) {

+ for (auto &I : *IfBlock2)

+ I.dropUnknownNonDebugMetadata();

DomBlock->getInstList().splice(InsertPt->getIterator(),

IfBlock2->getInstList(), IfBlock2->begin(),

IfBlock2->getTerminator()->getIterator());

+ }

while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {

// Change the PHI node into a select instruction.