diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-12-02 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-12-02 19:36:28 +0000 |
commit | 4dcfa144380f1274ecb91e53c76ee30a99254afa (patch) | |
tree | d6a1e7d89e686c20ae498a12cd06039e94ac6166 /contrib/llvm/lib | |
parent | 33e643f70b30dcca5af297caa76aa6de3ad1392e (diff) | |
parent | 545937e1be2eb318dc3c8db284ab27a64c51c773 (diff) | |
download | src-4dcfa144380f1274ecb91e53c76ee30a99254afa.tar.gz src-4dcfa144380f1274ecb91e53c76ee30a99254afa.zip |
Update llvm, clang, lld and lldb to release_39 branch r288513.
Notes
Notes:
svn path=/projects/clang391-import/; revision=309437
Diffstat (limited to 'contrib/llvm/lib')
7 files changed, 52 insertions, 25 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 3ab9459c8af7..9a18943291c8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2185,24 +2185,29 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, // options. This is a trivially-generalized version of the code from // Hacker's Delight (itself derived from Knuth's Algorithm M from section // 4.3.1). - SDValue Mask = - DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(), - NVT.getSizeInBits() >> 1), dl, NVT); + unsigned Bits = NVT.getSizeInBits(); + unsigned HalfBits = Bits >> 1; + SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, + NVT); SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); - SDValue Shift = - DAG.getConstant(NVT.getSizeInBits() >> 1, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout())); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { + // The type from TLI is too small to fit the shift amount we want. + // Override it with i32. The shift will have to be legalized. + ShiftAmtTy = MVT::i32; + } + SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); SDValue U = DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL); + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH); SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); @@ -2211,14 +2216,14 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); SDValue W = DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, LL, RL), + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH), DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); - Lo = DAG.getNode(ISD::ADD, dl, NVT, TH, + Lo = DAG.getNode(ISD::ADD, dl, NVT, TL, DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); Hi = DAG.getNode(ISD::ADD, dl, NVT, W, DAG.getNode(ISD::ADD, dl, NVT, - DAG.getNode(ISD::MUL, dl, NVT, RH, LL), + DAG.getNode(ISD::MUL, dl, NVT, RH, LL), DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); return; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5cc6a4e0e83e..919081902a9c 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2203,7 +2203,8 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, } void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { - MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineFunction &MF = *MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Legalize VOP2 if (isVOP2(MI) || isVOPC(MI)) { @@ -2321,8 +2322,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { return; } - // Legalize MIMG - if (isMIMG(MI)) { + // Legalize MIMG and MUBUF/MTBUF for shaders. + // + // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via + // scratch memory access. In both cases, the legalization never involves + // conversion to the addr64 form. + if (isMIMG(MI) || + (AMDGPU::isShader(MF.getFunction()->getCallingConv()) && + (isMUBUF(MI) || isMTBUF(MI)))) { MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); @@ -2337,9 +2344,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { return; } - // Legalize MUBUF* instructions + // Legalize MUBUF* instructions by converting to addr64 form. // FIXME: If we start using the non-addr64 instructions for compute, we - // may need to legalize them here. + // may need to legalize them as above. This especially applies to the + // buffer_load_format_* variants and variants with idxen (or bothen). int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); if (SRsrcIdx != -1) { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 18b7d5d62efe..dde5f2fc6b40 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2029,6 +2029,7 @@ def SI_RETURN : PseudoInstSI < let hasSideEffects = 1; let SALU = 1; let hasNoSchedulingInfo = 1; + let DisableWQM = 1; } let Uses = [EXEC], Defs = [EXEC, VCC, M0], diff --git a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index b200c153df0b..1534d5825696 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -219,13 +219,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, markInstruction(MI, Flags, Worklist); GlobalFlags |= Flags; } - - if (WQMOutputs && MBB.succ_empty()) { - // This is a prolog shader. Make sure we go back to exact mode at the end. - Blocks[&MBB].OutNeeds = StateExact; - Worklist.push_back(&MBB); - GlobalFlags |= StateExact; - } } return GlobalFlags; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index bfd73f4bbac5..961497fe3c2d 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -634,7 +634,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base, } if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) && - !isa<GEPOperator>(V) && !isa<PHINode>(V)) + !isa<GetElementPtrInst>(V) && !isa<PHINode>(V)) // We've found some value that we can't explore which is different from // the base. Therefore we can't do this transformation. return false; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index d312983ed51b..d88456ee4adc 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -579,6 +579,13 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { UndefValue::get(T), NewLoad, 0, Name)); } + // Bail out if the array is too large. Ideally we would like to optimize + // arrays of arbitrary size but this has a terrible impact on compile time. + // The threshold here is chosen arbitrarily, maybe needs a little bit of + // tuning. + if (NumElements > 1024) + return nullptr; + const DataLayout &DL = IC.getDataLayout(); auto EltSize = DL.getTypeAllocSize(ET); auto Align = LI.getAlignment(); @@ -1081,6 +1088,13 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { return true; } + // Bail out if the array is too large. Ideally we would like to optimize + // arrays of arbitrary size but this has a terrible impact on compile time. + // The threshold here is chosen arbitrarily, maybe needs a little bit of + // tuning. + if (NumElements > 1024) + return false; + const DataLayout &DL = IC.getDataLayout(); auto EltSize = DL.getTypeAllocSize(AT->getElementType()); auto Align = SI.getAlignment(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 0504646c304e..c197317ac771 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. - if (IfBlock1) + if (IfBlock1) { + for (auto &I : *IfBlock1) + I.dropUnknownNonDebugMetadata(); DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock1->getInstList(), IfBlock1->begin(), IfBlock1->getTerminator()->getIterator()); - if (IfBlock2) + } + if (IfBlock2) { + for (auto &I : *IfBlock2) + I.dropUnknownNonDebugMetadata(); DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()->getIterator()); + } while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. |