diff options
Diffstat (limited to 'contrib/llvm/lib')
182 files changed, 3361 insertions, 2659 deletions
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 09582cf9a71d..3db041cc0fa6 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -808,7 +808,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // well. Or alternatively, replace all of this with inaccessiblememonly once // that's implemented fully. auto *Inst = CS.getInstruction(); - if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) { + if (isMallocOrCallocLikeFn(Inst, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias) @@ -925,9 +925,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, const DataLayout &DL) { assert(GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && - GEP1->getPointerOperand()->getType() == - GEP2->getPointerOperand()->getType() && + GEP2->getPointerOperand()->stripPointerCasts() && + GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && "Expected GEPs with the same pointer operand"); // Try to determine whether GEP1 and GEP2 index through arrays, into structs, @@ -1186,9 +1185,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // just the same underlying object), see if that tells us anything about // the resulting pointers. if (GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && - GEP1->getPointerOperand()->getType() == - GEP2->getPointerOperand()->getType()) { + GEP2->getPointerOperand()->stripPointerCasts() && + GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) { AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. if (R != MayAlias) diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 5935dec15c70..0dc4475ca0e2 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -72,6 +72,32 @@ static const uint32_t UR_TAKEN_WEIGHT = 1; /// easily subsume it. static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; +/// \brief Returns the branch probability for unreachable edge according to +/// heuristic. +/// +/// This is the branch probability being taken to a block that terminates +/// (eventually) in unreachable. These are predicted as unlikely as possible. +static BranchProbability getUnreachableProbability(uint64_t UnreachableCount) { + assert(UnreachableCount > 0 && "UnreachableCount must be > 0"); + return BranchProbability::getBranchProbability( + UR_TAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * UnreachableCount); +} + +/// \brief Returns the branch probability for reachable edge according to +/// heuristic. +/// +/// This is the branch probability not being taken toward a block that +/// terminates (eventually) in unreachable. Such a branch is essentially never +/// taken. Set the weight to an absurdly high value so that nested loops don't +/// easily subsume it. +static BranchProbability getReachableProbability(uint64_t ReachableCount) { + assert(ReachableCount > 0 && "ReachableCount must be > 0"); + return BranchProbability::getBranchProbability( + UR_NONTAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * ReachableCount); +} + /// \brief Weight for a branch taken going into a cold block. /// /// This is the weight for a branch taken toward a block marked @@ -179,7 +205,11 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { /// unreachable-terminated block as extremely unlikely. bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); + + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa<InvokeInst>(TI)) return false; SmallVector<unsigned, 4> UnreachableEdges; @@ -191,14 +221,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { else ReachableEdges.push_back(I.getSuccessorIndex()); - // Skip probabilities if this block has a single successor or if all were - // reachable. - if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) - return false; - - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa<InvokeInst>(TI)) + // Skip probabilities if all were reachable. + if (UnreachableEdges.empty()) return false; if (ReachableEdges.empty()) { @@ -208,12 +232,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { return true; } - auto UnreachableProb = BranchProbability::getBranchProbability( - UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * - uint64_t(UnreachableEdges.size())); - auto ReachableProb = BranchProbability::getBranchProbability( - UR_NONTAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size())); + auto UnreachableProb = getUnreachableProbability(UnreachableEdges.size()); + auto ReachableProb = getReachableProbability(ReachableEdges.size()); for (unsigned SuccIdx : UnreachableEdges) setEdgeProbability(BB, SuccIdx, UnreachableProb); @@ -224,11 +244,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { } // Propagate existing explicit probabilities from either profile data or -// 'expect' intrinsic processing. +// 'expect' intrinsic processing. Examine metadata against unreachable +// heuristic. The probability of the edge coming to unreachable block is +// set to min of metadata and unreachable heuristic. bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 1) - return false; + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) return false; @@ -249,6 +270,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { // be scaled to fit in 32 bits. uint64_t WeightSum = 0; SmallVector<uint32_t, 2> Weights; + SmallVector<unsigned, 2> UnreachableIdxs; + SmallVector<unsigned, 2> ReachableIdxs; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { ConstantInt *Weight = @@ -259,6 +282,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); + if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1))) + UnreachableIdxs.push_back(i - 1); + else + ReachableIdxs.push_back(i - 1); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); @@ -267,20 +294,52 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { uint64_t ScalingFactor = (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; - WeightSum = 0; - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - Weights[i] /= ScalingFactor; - WeightSum += Weights[i]; + if (ScalingFactor > 1) { + WeightSum = 0; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + Weights[i] /= ScalingFactor; + WeightSum += Weights[i]; + } } - if (WeightSum == 0) { - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeProbability(BB, i, {1, e}); - } else { + if (WeightSum == 0 || ReachableIdxs.size() == 0) { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)}); + Weights[i] = 1; + WeightSum = TI->getNumSuccessors(); + } + + // Set the probability. + SmallVector<BranchProbability, 2> BP; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + BP.push_back({ Weights[i], static_cast<uint32_t>(WeightSum) }); + + // Examine the metadata against unreachable heuristic. + // If the unreachable heuristic is more strong then we use it for this edge. + if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) { + auto ToDistribute = BranchProbability::getZero(); + auto UnreachableProb = getUnreachableProbability(UnreachableIdxs.size()); + for (auto i : UnreachableIdxs) + if (UnreachableProb < BP[i]) { + ToDistribute += BP[i] - UnreachableProb; + BP[i] = UnreachableProb; + } + + // If we modified the probability of some edges then we must distribute + // the difference between reachable blocks. + if (ToDistribute > BranchProbability::getZero()) { + BranchProbability PerEdge = ToDistribute / ReachableIdxs.size(); + for (auto i : ReachableIdxs) { + BP[i] += PerEdge; + ToDistribute -= PerEdge; + } + // Tail goes to the first reachable edge. + BP[ReachableIdxs[0]] += ToDistribute; + } } + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, BP[i]); + assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); @@ -297,7 +356,11 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { /// Return false, otherwise. bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); + + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa<InvokeInst>(TI)) return false; // Determine which successors are post-dominated by a cold block. @@ -309,13 +372,8 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { else NormalEdges.push_back(I.getSuccessorIndex()); - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa<InvokeInst>(TI)) - return false; - - // Skip probabilities if this block has a single successor. - if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + // Skip probabilities if no cold edges. + if (ColdEdges.empty()) return false; if (NormalEdges.empty()) { @@ -698,10 +756,13 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); updatePostDominatedByUnreachable(BB); updatePostDominatedByColdCall(BB); - if (calcUnreachableHeuristics(BB)) + // If there is no at least two successors, no sense to set probability. + if (BB->getTerminator()->getNumSuccessors() < 2) continue; if (calcMetadataWeights(BB)) continue; + if (calcUnreachableHeuristics(BB)) + continue; if (calcColdCallHeuristics(BB)) continue; if (calcLoopBranchHeuristics(BB, LI)) diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h index e526e0e16aa7..75726e84569b 100644 --- a/contrib/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm/lib/Analysis/CFLGraph.h @@ -400,8 +400,7 @@ template <typename CFLAA> class CFLGraphBuilder { // TODO: address other common library functions such as realloc(), // strdup(), // etc. - if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || - isFreeCall(Inst, &TLI)) + if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) return; // TODO: Add support for noalias args/all the other fun function diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index e12f640394e6..2259fbaeb982 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -75,20 +75,16 @@ static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, const Query &, unsigned); -/// For a boolean type, or a vector of boolean type, return false, or -/// a vector with every element false, as appropriate for the type. +/// For a boolean type or a vector of boolean type, return false or a vector +/// with every element false. static Constant *getFalse(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && - "Expected i1 type or a vector of i1!"); - return Constant::getNullValue(Ty); + return ConstantInt::getFalse(Ty); } -/// For a boolean type, or a vector of boolean type, return true, or -/// a vector with every element true, as appropriate for the type. +/// For a boolean type or a vector of boolean type, return true or a vector +/// with every element true. static Constant *getTrue(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && - "Expected i1 type or a vector of i1!"); - return Constant::getAllOnesValue(Ty); + return ConstantInt::getTrue(Ty); } /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? @@ -572,11 +568,11 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Ty); - // add nsw/nuw (xor Y, signbit), signbit --> Y + // add nsw/nuw (xor Y, signmask), signmask --> Y // The no-wrapping add guarantees that the top bit will be set by the add. // Therefore, the xor must be clearing the already set sign bit of Y. - if ((isNSW || isNUW) && match(Op1, m_SignBit()) && - match(Op0, m_Xor(m_Value(Y), m_SignBit()))) + if ((isNSW || isNUW) && match(Op1, m_SignMask()) && + match(Op0, m_Xor(m_Value(Y), m_SignMask()))) return Y; /// i1 add -> xor. @@ -1085,7 +1081,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && match(Op1, m_ConstantInt(C2))) { bool Overflow; - C1->getValue().umul_ov(C2->getValue(), Overflow); + (void)C1->getValue().umul_ov(C2->getValue(), Overflow); if (Overflow) return Constant::getNullValue(Op0->getType()); } @@ -2823,7 +2819,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return ConstantInt::getTrue(RHS->getContext()); } } - if (CIVal->isSignBit() && *CI2Val == 1) { + if (CIVal->isSignMask() && *CI2Val == 1) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) @@ -3800,6 +3796,8 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); if (isa<UndefValue>(Ops[0])) return UndefValue::get(GEPTy); @@ -4082,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, RecursionLimit); } +/// For the given destination element of a shuffle, peek through shuffles to +/// match a root vector source operand that contains that element in the same +/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). +static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, + Constant *Mask, Value *RootVec, int RootElt, + unsigned MaxRecurse) { + if (!MaxRecurse--) + return nullptr; + + // Bail out if any mask value is undefined. That kind of shuffle may be + // simplified further based on demanded bits or other folds. + int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt); + if (MaskVal == -1) + return nullptr; + + // The mask value chooses which source operand we need to look at next. + Value *SourceOp; + int InVecNumElts = Op0->getType()->getVectorNumElements(); + if (MaskVal < InVecNumElts) { + RootElt = MaskVal; + SourceOp = Op0; + } else { + RootElt = MaskVal - InVecNumElts; + SourceOp = Op1; + } + + // If the source operand is a shuffle itself, look through it to find the + // matching root vector. + if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) { + return foldIdentityShuffles( + DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), + SourceShuf->getMask(), RootVec, RootElt, MaxRecurse); + } + + // TODO: Look through bitcasts? What if the bitcast changes the vector element + // size? + + // The source operand is not a shuffle. Initialize the root vector value for + // this shuffle if that has not been done yet. + if (!RootVec) + RootVec = SourceOp; + + // Give up as soon as a source operand does not match the existing root value. + if (RootVec != SourceOp) + return nullptr; + + // The element must be coming from the same lane in the source vector + // (although it may have crossed lanes in intermediate shuffles). + if (RootElt != DestElt) + return nullptr; + + return RootVec; +} + static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const Query &Q, unsigned MaxRecurse) { @@ -4126,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, OpShuf->getMask()->getSplatValue()) return Op1; - return nullptr; + // Don't fold a shuffle with undef mask elements. This may get folded in a + // better way using demanded bits or other analysis. + // TODO: Should we allow this? + for (unsigned i = 0; i != MaskNumElts; ++i) + if (ShuffleVectorInst::getMaskValue(Mask, i) == -1) + return nullptr; + + // Check if every element of this shuffle can be mapped back to the + // corresponding element of a single root vector. If so, we don't need this + // shuffle. This handles simple identity shuffles as well as chains of + // shuffles that may widen/narrow and/or move elements across lanes and back. + Value *RootVec = nullptr; + for (unsigned i = 0; i != MaskNumElts; ++i) { + // Note that recursion is limited for each vector element, so if any element + // exceeds the limit, this will fail to simplify. + RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse); + + // We can't replace a widening/narrowing shuffle with one of its operands. + if (!RootVec || RootVec->getType() != RetTy) + return nullptr; + } + return RootVec; } /// Given operands for a ShuffleVectorInst, fold the result or return null. diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index b8c444904723..7983d62c2f7a 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -37,6 +37,7 @@ enum AllocType : uint8_t { CallocLike = 1<<2, // allocates + bzero ReallocLike = 1<<3, // reallocates StrDupLike = 1<<4, + MallocOrCallocLike = MallocLike | CallocLike, AllocLike = MallocLike | CallocLike | StrDupLike, AnyAlloc = AllocLike | ReallocLike }; @@ -77,8 +78,8 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; -static Function *getCalledFunction(const Value *V, bool LookThroughBitCast, - bool &IsNoBuiltin) { +static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast, + bool &IsNoBuiltin) { // Don't care about intrinsics in this case. if (isa<IntrinsicInst>(V)) return nullptr; @@ -86,13 +87,13 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast, if (LookThroughBitCast) V = V->stripPointerCasts(); - CallSite CS(const_cast<Value*>(V)); + ImmutableCallSite CS(V); if (!CS.getInstruction()) return nullptr; IsNoBuiltin = CS.isNoBuiltin(); - Function *Callee = CS.getCalledFunction(); + const Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) return nullptr; return Callee; @@ -220,6 +221,14 @@ bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, } /// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory similiar to malloc or calloc. +bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocOrCallocLike, TLI, + LookThroughBitCast).hasValue(); +} + +/// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp index 910170561abf..2480fe44d5c0 100644 --- a/contrib/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp @@ -1291,7 +1291,6 @@ void MemorySSA::buildMemorySSA() { // could just look up the memory access for every possible instruction in the // stream. SmallPtrSet<BasicBlock *, 32> DefiningBlocks; - SmallPtrSet<BasicBlock *, 32> DefUseBlocks; // Go through each block, figure out where defs occur, and chain together all // the accesses. for (BasicBlock &B : F) { @@ -1316,8 +1315,6 @@ void MemorySSA::buildMemorySSA() { } if (InsertIntoDef) DefiningBlocks.insert(&B); - if (Accesses) - DefUseBlocks.insert(&B); } placePHINodes(DefiningBlocks, BBNumbers); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index ca32cf3c7c34..700c383a9dd4 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1093,7 +1093,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, APInt Mult(W, i); unsigned TwoFactors = Mult.countTrailingZeros(); T += TwoFactors; - Mult = Mult.lshr(TwoFactors); + Mult.lshrInPlace(TwoFactors); OddFactorial *= Mult; } @@ -1276,7 +1276,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, namespace { struct ExtendOpTraitsBase { - typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)( + const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache); }; // Used to make code generic over signed and unsigned overflow. @@ -1305,8 +1306,9 @@ struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< - SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; +const ExtendOpTraitsBase::GetExtendExprTy + ExtendOpTraits<SCEVSignExtendExpr>::GetExtendExpr = + &ScalarEvolution::getSignExtendExprCached; template <> struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { @@ -1321,8 +1323,9 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< - SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; +const ExtendOpTraitsBase::GetExtendExprTy + ExtendOpTraits<SCEVZeroExtendExpr>::GetExtendExpr = + &ScalarEvolution::getZeroExtendExprCached; } // The recurrence AR has been shown to have no signed/unsigned wrap or something @@ -1334,7 +1337,8 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< // "sext/zext(PostIncAR)" template <typename ExtendOpTy> static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, - ScalarEvolution *SE) { + ScalarEvolution *SE, + ScalarEvolution::ExtendCacheTy &Cache) { auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; @@ -1381,9 +1385,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = - SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), - (SE->*GetExtendExpr)(Step, WideTy)); - if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache), + (SE->*GetExtendExpr)(Step, WideTy, Cache)); + if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) { if (PreAR && AR->getNoWrapFlags(WrapType)) { // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then @@ -1408,15 +1412,17 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // Get the normalized zero or sign extended expression for this AddRec's Start. template <typename ExtendOpTy> static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, - ScalarEvolution *SE) { + ScalarEvolution *SE, + ScalarEvolution::ExtendCacheTy &Cache) { auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; - const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE); + const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Cache); if (!PreStart) - return (SE->*GetExtendExpr)(AR->getStart(), Ty); + return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache); - return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), - (SE->*GetExtendExpr)(PreStart, Ty)); + return SE->getAddExpr( + (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache), + (SE->*GetExtendExpr)(PreStart, Ty, Cache)); } // Try to prove away overflow by looking at "nearby" add recurrences. A @@ -1496,8 +1502,31 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, return false; } -const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, - Type *Ty) { +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { + // Use the local cache to prevent exponential behavior of + // getZeroExtendExprImpl. + ExtendCacheTy Cache; + return getZeroExtendExprCached(Op, Ty, Cache); +} + +/// Query \p Cache before calling getZeroExtendExprImpl. If there is no +/// related entry in the \p Cache, call getZeroExtendExprImpl and save +/// the result in the \p Cache. +const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { + auto It = Cache.find({Op, Ty}); + if (It != Cache.end()) + return It->second; + const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache); + auto InsertResult = Cache.insert({{Op, Ty}, ZExt}); + assert(InsertResult.second && "Expect the key was not in the cache"); + (void)InsertResult; + return ZExt; +} + +/// The real implementation of getZeroExtendExpr. +const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1507,11 +1536,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) return getConstant( - cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); + cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) - return getZeroExtendExpr(SZ->getOperand(), Ty); + return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. @@ -1555,8 +1584,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // we don't need to do any further analysis. if (AR->hasNoUnsignedWrap()) return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1581,21 +1610,22 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); - const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *ZAdd = + getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache); + const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache); const SCEV *WideMaxBECount = - getZeroExtendExpr(CastedMaxBECount, WideTy); - const SCEV *OperandExtendedAdd = - getAddExpr(WideStart, - getMulExpr(WideMaxBECount, - getZeroExtendExpr(Step, WideTy))); + getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache); + const SCEV *OperandExtendedAdd = getAddExpr( + WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached( + Step, WideTy, Cache))); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1609,7 +1639,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1641,8 +1671,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - @@ -1657,7 +1688,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1666,8 +1697,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); return getAddRecExpr( - getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); } } @@ -1678,7 +1709,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // commute the zero extension with the addition operation. SmallVector<const SCEV *, 4> Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getZeroExtendExpr(Op, Ty)); + Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache)); return getAddExpr(Ops, SCEV::FlagNUW); } } @@ -1692,8 +1723,31 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } -const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, - Type *Ty) { +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { + // Use the local cache to prevent exponential behavior of + // getSignExtendExprImpl. + ExtendCacheTy Cache; + return getSignExtendExprCached(Op, Ty, Cache); +} + +/// Query \p Cache before calling getSignExtendExprImpl. If there is no +/// related entry in the \p Cache, call getSignExtendExprImpl and save +/// the result in the \p Cache. +const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { + auto It = Cache.find({Op, Ty}); + if (It != Cache.end()) + return It->second; + const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache); + auto InsertResult = Cache.insert({{Op, Ty}, SExt}); + assert(InsertResult.second && "Expect the key was not in the cache"); + (void)InsertResult; + return SExt; +} + +/// The real implementation of getSignExtendExpr. +const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1703,11 +1757,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) return getConstant( - cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); + cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) - return getSignExtendExpr(SS->getOperand(), Ty); + return getSignExtendExprCached(SS->getOperand(), Ty, Cache); // sext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) @@ -1746,8 +1800,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) - return getAddExpr(getSignExtendExpr(SC1, Ty), - getSignExtendExpr(SMul, Ty)); + return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache), + getSignExtendExprCached(SMul, Ty, Cache)); } } } @@ -1758,7 +1812,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // commute the sign extension with the addition operation. SmallVector<const SCEV *, 4> Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getSignExtendExpr(Op, Ty)); + Ops.push_back(getSignExtendExprCached(Op, Ty, Cache)); return getAddExpr(Ops, SCEV::FlagNSW); } } @@ -1782,8 +1836,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // we don't need to do any further analysis. if (AR->hasNoSignedWrap()) return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1808,21 +1862,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); - const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *SAdd = + getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache); + const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache); const SCEV *WideMaxBECount = - getZeroExtendExpr(CastedMaxBECount, WideTy); - const SCEV *OperandExtendedAdd = - getAddExpr(WideStart, - getMulExpr(WideMaxBECount, - getSignExtendExpr(Step, WideTy))); + getZeroExtendExpr(CastedMaxBECount, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr( + WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached( + Step, WideTy, Cache))); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -1843,7 +1898,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache), getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1875,8 +1930,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } } @@ -1890,18 +1946,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { - Start = getSignExtendExpr(Start, Ty); + Start = getSignExtendExprCached(Start, Ty, Cache); const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, AR->getNoWrapFlags()); - return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache)); } } if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); } } @@ -3951,9 +4007,9 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { case Instruction::Xor: if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1))) - // If the RHS of the xor is a signbit, then this is just an add. - // Instcombine turns add of signbit into xor as a strength reduction step. - if (RHSC->getValue().isSignBit()) + // If the RHS of the xor is a signmask, then this is just an add. + // Instcombine turns add of signmask into xor as a strength reduction step. + if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); @@ -5272,28 +5328,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case Instruction::Or: - // If the RHS of the Or is a constant, we may have something like: - // X*4+1 which got turned into X*4|1. Handle this as an Add so loop - // optimizations will transparently handle this case. - // - // In order for this transformation to be safe, the LHS must be of the - // form X*(2^n) and the Or constant must be less than 2^n. - if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { - const SCEV *LHS = getSCEV(BO->LHS); - const APInt &CIVal = CI->getValue(); - if (GetMinTrailingZeros(LHS) >= - (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { - // Build a plain add SCEV. - const SCEV *S = getAddExpr(LHS, getSCEV(CI)); - // If the LHS of the add was an addrec and it has no-wrap flags, - // transfer the no-wrap flags, since an or won't introduce a wrap. - if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); - const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( - OldAR->getNoWrapFlags()); - } - return S; - } + // Use ValueTracking to check whether this is actually an add. + if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC, + nullptr, &DT)) { + // There aren't any common bits set, so the add can't wrap. + auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW); + return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); } break; @@ -5329,7 +5369,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // using an add, which is equivalent, and re-apply the zext. APInt Trunc = CI->getValue().trunc(Z0TySize); if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && - Trunc.isSignBit()) + Trunc.isSignMask()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); } diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index d871e83f222a..900a2363e60d 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -292,15 +292,15 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, KnownOne = PossibleSumOne & Known; // Are we still trying to solve for the sign bit? - if (!Known.isNegative()) { + if (!Known.isSignBitSet()) { if (NSW) { // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) KnownZero.setSignBit(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) KnownOne.setSignBit(); } } @@ -322,10 +322,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { - bool isKnownNonNegativeOp1 = KnownZero.isNegative(); - bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); - bool isKnownNegativeOp1 = KnownOne.isNegative(); - bool isKnownNegativeOp0 = KnownOne2.isNegative(); + bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet(); + bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet(); + bool isKnownNegativeOp1 = KnownOne.isSignBitSet(); + bool isKnownNegativeOp0 = KnownOne2.isSignBitSet(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); @@ -361,9 +361,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. - if (isKnownNonNegative && !KnownOne.isNegative()) + if (isKnownNonNegative && !KnownOne.isSignBitSet()) KnownZero.setSignBit(); - else if (isKnownNegative && !KnownZero.isNegative()) + else if (isKnownNegative && !KnownZero.isSignBitSet()) KnownOne.setSignBit(); } @@ -661,8 +661,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); - KnownOne |= RHSKnownOne.lshr(C->getZExtValue()); + RHSKnownZero.lshrInPlace(C->getZExtValue()); + KnownZero |= RHSKnownZero; + RHSKnownOne.lshrInPlace(C->getZExtValue()); + KnownOne |= RHSKnownOne; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && @@ -672,8 +674,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); - KnownOne |= RHSKnownZero.lshr(C->getZExtValue()); + RHSKnownOne.lshrInPlace(C->getZExtValue()); + KnownZero |= RHSKnownOne; + RHSKnownZero.lshrInPlace(C->getZExtValue()); + KnownOne |= RHSKnownZero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), @@ -707,7 +711,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - if (RHSKnownZero.isNegative()) { + if (RHSKnownZero.isSignBitSet()) { // We know that the sign bit is zero. KnownZero.setSignBit(); } @@ -718,7 +722,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { + if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) { // We know that the sign bit is zero. KnownZero.setSignBit(); } @@ -729,7 +733,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - if (RHSKnownOne.isNegative()) { + if (RHSKnownOne.isSignBitSet()) { // We know that the sign bit is one. KnownOne.setSignBit(); } @@ -740,7 +744,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { + if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) { // We know that the sign bit is one. KnownOne.setSignBit(); } @@ -990,23 +994,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (KnownOne.isNegative() && KnownOne2.isNegative()) + if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (KnownZero.isNegative() || KnownZero2.isNegative()) + else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (KnownZero.isNegative() && KnownZero2.isNegative()) + if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); // If either side is negative, the result is negative. - else if (KnownOne.isNegative() || KnownOne2.isNegative()) + else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the @@ -1092,14 +1096,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, KZResult.setLowBits(ShiftAmt); // Low bits known 0. // If this shift has "nsw" keyword, then the result is either a poison // value or has the same sign bit as the first operand. - if (NSW && KnownZero.isNegative()) + if (NSW && KnownZero.isSignBitSet()) KZResult.setSignBit(); return KZResult; }; auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) { APInt KOResult = KnownOne << ShiftAmt; - if (NSW && KnownOne.isNegative()) + if (NSW && KnownOne.isSignBitSet()) KOResult.setSignBit(); return KOResult; }; @@ -1111,10 +1115,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { - return KnownZero.lshr(ShiftAmt) | - // High bits known zero. - APInt::getHighBitsSet(BitWidth, ShiftAmt); + auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { + APInt KZResult = KnownZero.lshr(ShiftAmt); + // High bits known zero. + KZResult.setHighBits(ShiftAmt); + return KZResult; }; auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { @@ -1169,28 +1174,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits)) + if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits)) KnownZero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0)) + if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (KnownZero.isNonNegative()) { - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - Q); - // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isNegative()) - KnownZero.setSignBit(); - } + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + // If it's known zero, our sign bit is also zero. + if (KnownZero2.isSignBitSet()) + KnownZero.setSignBit(); break; case Instruction::URem: { @@ -1331,24 +1333,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { - if (KnownZero2.isNegative() && KnownZero3.isNegative()) + if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet()) KnownZero.setSignBit(); - else if (KnownOne2.isNegative() && KnownOne3.isNegative()) + else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet()) KnownOne.setSignBit(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { - if (KnownZero2.isNegative() && KnownOne3.isNegative()) + if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet()) KnownZero.setSignBit(); - else if (KnownOne2.isNegative() && KnownZero3.isNegative()) + else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet()) KnownOne.setSignBit(); } // (mul nsw non-negative, non-negative) --> non-negative - else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && - KnownZero3.isNegative()) + else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() && + KnownZero3.isSignBitSet()) KnownZero.setSignBit(); } @@ -1614,8 +1616,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); computeKnownBits(V, ZeroBits, OneBits, Depth, Q); - KnownOne = OneBits.isNegative(); - KnownZero = ZeroBits.isNegative(); + KnownOne = OneBits.isSignBitSet(); + KnownZero = ZeroBits.isSignBitSet(); } /// Return true if the given value is known to have exactly one @@ -1638,9 +1640,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, if (match(V, m_Shl(m_One(), m_Value()))) return true; - // (signbit) >>l X is clearly a power of two if the one is not shifted off the - // bottom. If it is shifted off the bottom then the result is undefined. - if (match(V, m_LShr(m_SignBit(), m_Value()))) + // (signmask) >>l X is clearly a power of two if the one is not shifted off + // the bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignMask(), m_Value()))) return true; // The remaining tests are all recursive, so bail out if we hit the limit. @@ -2241,7 +2243,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isNegative()) + if (KnownZero.isSignBitSet()) return Tmp; } @@ -2265,7 +2267,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isNegative()) + if (KnownZero.isSignBitSet()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -2322,10 +2324,10 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (KnownZero.isNegative()) + if (KnownZero.isSignBitSet()) return std::max(FirstAnswer, KnownZero.countLeadingOnes()); - if (KnownOne.isNegative()) + if (KnownOne.isSignBitSet()) return std::max(FirstAnswer, KnownOne.countLeadingOnes()); // computeKnownBits gave us no extra information about the top bits. @@ -3556,14 +3558,14 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // We know the multiply operation doesn't overflow if the maximum values for // each operand will not overflow after we multiply them together. bool MaxOverflow; - LHSMax.umul_ov(RHSMax, MaxOverflow); + (void)LHSMax.umul_ov(RHSMax, MaxOverflow); if (!MaxOverflow) return OverflowResult::NeverOverflows; // We know it always overflows if multiplying the smallest possible values for // the operands also results in overflow. bool MinOverflow; - LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); + (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); if (MinOverflow) return OverflowResult::AlwaysOverflows; diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index 58ea9296afda..c7076ed0dd81 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -143,27 +143,24 @@ bool LLParser::ValidateEndOfModule() { FnAttrs.removeAttribute(Attribute::Alignment); } - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); Fn->setAttributes(AS); } else if (CallInst *CI = dyn_cast<CallInst>(V)) { AttributeList AS = CI->getAttributes(); AttrBuilder FnAttrs(AS.getFnAttributes()); AS = AS.removeAttributes(Context, AttributeList::FunctionIndex); FnAttrs.merge(B); - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); CI->setAttributes(AS); } else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) { AttributeList AS = II->getAttributes(); AttrBuilder FnAttrs(AS.getFnAttributes()); AS = AS.removeAttributes(Context, AttributeList::FunctionIndex); FnAttrs.merge(B); - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); II->setAttributes(AS); } else { llvm_unreachable("invalid object with forward attribute group reference"); diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 24ab7e9a950c..6d727ce83346 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -372,15 +372,27 @@ Expected<std::string> readTriple(BitstreamCursor &Stream) { class BitcodeReaderBase { protected: - BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) { + BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab) + : Stream(std::move(Stream)), Strtab(Strtab) { this->Stream.setBlockInfo(&BlockInfo); } BitstreamBlockInfo BlockInfo; BitstreamCursor Stream; + StringRef Strtab; + + /// In version 2 of the bitcode we store names of global values and comdats in + /// a string table rather than in the VST. + bool UseStrtab = false; Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record); + /// If this module uses a string table, pop the reference to the string table + /// and return the referenced string and the rest of the record. Otherwise + /// just return the record itself. + std::pair<StringRef, ArrayRef<uint64_t>> + readNameFromStrtab(ArrayRef<uint64_t> Record); + bool readBlockInfo(); // Contains an arbitrary and optional string identifying the bitcode producer @@ -402,11 +414,22 @@ BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) { if (Record.size() < 1) return error("Invalid record"); unsigned ModuleVersion = Record[0]; - if (ModuleVersion > 1) + if (ModuleVersion > 2) return error("Invalid value"); + UseStrtab = ModuleVersion >= 2; return ModuleVersion; } +std::pair<StringRef, ArrayRef<uint64_t>> +BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) { + if (!UseStrtab) + return {"", Record}; + // Invalid reference. Let the caller complain about the record being empty. + if (Record[0] + Record[1] > Strtab.size()) + return {"", {}}; + return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)}; +} + class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; @@ -492,8 +515,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { std::vector<std::string> BundleTags; public: - BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification, - LLVMContext &Context); + BitcodeReader(BitstreamCursor Stream, StringRef Strtab, + StringRef ProducerIdentification, LLVMContext &Context); Error materializeForwardReferencedFunctions(); @@ -628,7 +651,10 @@ private: Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record, unsigned NameIndex, Triple &TT); + void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, + ArrayRef<uint64_t> Record); Error parseValueSymbolTable(uint64_t Offset = 0); + Error parseGlobalValueSymbolTable(); Error parseConstants(); Error rememberAndSkipFunctionBodies(); Error rememberAndSkipFunctionBody(); @@ -681,12 +707,15 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { std::string SourceFileName; public: - ModuleSummaryIndexBitcodeReader( - BitstreamCursor Stream, ModuleSummaryIndex &TheIndex); + ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, + ModuleSummaryIndex &TheIndex); Error parseModule(StringRef ModulePath); private: + void setValueGUID(uint64_t ValueID, StringRef ValueName, + GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName); Error parseValueSymbolTable( uint64_t Offset, DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap); @@ -716,10 +745,10 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, return std::error_code(); } -BitcodeReader::BitcodeReader(BitstreamCursor Stream, +BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) - : BitcodeReaderBase(std::move(Stream)), Context(Context), + : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), ValueList(Context) { this->ProducerIdentification = ProducerIdentification; } @@ -1749,6 +1778,54 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset, return CurrentBit; } +void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, + Function *F, + ArrayRef<uint64_t> Record) { + // Note that we subtract 1 here because the offset is relative to one word + // before the start of the identification or module block, which was + // historically always the start of the regular bitcode header. + uint64_t FuncWordOffset = Record[1] - 1; + uint64_t FuncBitOffset = FuncWordOffset * 32; + DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; + // Set the LastFunctionBlockBit to point to the last function block. + // Later when parsing is resumed after function materialization, + // we can simply skip that last function block. + if (FuncBitOffset > LastFunctionBlockBit) + LastFunctionBlockBit = FuncBitOffset; +} + +/// Read a new-style GlobalValue symbol table. +Error BitcodeReader::parseGlobalValueSymbolTable() { + unsigned FuncBitcodeOffsetDelta = + Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; + + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector<uint64_t, 64> Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Record: + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + case bitc::VST_CODE_FNENTRY: // [valueid, offset] + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, + cast<Function>(ValueList[Record[0]]), Record); + break; + } + } +} + /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { @@ -1756,8 +1833,18 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). - if (Offset > 0) + if (Offset > 0) { CurrentBit = jumpToValueSymbolTable(Offset, Stream); + // If this module uses a string table, read this as a module-level VST. + if (UseStrtab) { + if (Error Err = parseGlobalValueSymbolTable()) + return Err; + Stream.JumpToBit(CurrentBit); + return Error::success(); + } + // Otherwise, the VST will be in a similar format to a function-level VST, + // and will contain symbol names. + } // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that @@ -1818,23 +1905,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { return Err; Value *V = ValOrErr.get(); - auto *F = dyn_cast<Function>(V); // Ignore function offsets emitted for aliases of functions in older // versions of LLVM. - if (!F) - break; - - // Note that we subtract 1 here because the offset is relative to one word - // before the start of the identification or module block, which was - // historically always the start of the regular bitcode header. - uint64_t FuncWordOffset = Record[1] - 1; - uint64_t FuncBitOffset = FuncWordOffset * 32; - DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; - // Set the LastFunctionBlockBit to point to the last function block. - // Later when parsing is resumed after function materialization, - // we can simply skip that last function block. - if (FuncBitOffset > LastFunctionBlockBit) - LastFunctionBlockBit = FuncBitOffset; + if (auto *F = dyn_cast<Function>(V)) + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record); break; } case bitc::VST_CODE_BBENTRY: { @@ -2557,6 +2631,7 @@ Error BitcodeReader::globalCleanup() { // Look for intrinsic functions which need to be upgraded at some point for (Function &F : *TheModule) { + MDLoader->upgradeDebugIntrinsics(F); Function *NewFn; if (UpgradeIntrinsicFunction(&F, NewFn)) UpgradedIntrinsics[&F] = NewFn; @@ -2626,15 +2701,24 @@ bool BitcodeReaderBase::readBlockInfo() { } Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) { - // [selection_kind, name] - if (Record.size() < 2) + // v1: [selection_kind, name] + // v2: [strtab_offset, strtab_size, selection_kind] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 1) return error("Invalid record"); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); - std::string Name; - unsigned ComdatNameSize = Record[1]; - Name.reserve(ComdatNameSize); - for (unsigned i = 0; i != ComdatNameSize; ++i) - Name += (char)Record[2 + i]; + std::string OldFormatName; + if (!UseStrtab) { + if (Record.size() < 2) + return error("Invalid record"); + unsigned ComdatNameSize = Record[1]; + OldFormatName.reserve(ComdatNameSize); + for (unsigned i = 0; i != ComdatNameSize; ++i) + OldFormatName += (char)Record[2 + i]; + Name = OldFormatName; + } Comdat *C = TheModule->getOrInsertComdat(Name); C->setSelectionKind(SK); ComdatList.push_back(C); @@ -2642,9 +2726,13 @@ Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) { } Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) { - // [pointer type, isconst, initid, linkage, alignment, section, + // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, - // dllstorageclass, comdat] + // dllstorageclass, comdat] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 6) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2692,7 +2780,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) { ExternallyInitialized = Record[9]; GlobalVariable *NewGV = - new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name, nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) @@ -2724,9 +2812,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) { } Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) { - // [type, callingconv, isproto, linkage, paramattr, alignment, section, + // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, - // prefixdata] + // prefixdata] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 8) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2742,7 +2834,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) { return error("Invalid calling convention ID"); Function *Func = - Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); + Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule); Func->setCallingConv(CC); bool isProto = Record[2]; @@ -2810,11 +2902,15 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) { Error BitcodeReader::parseGlobalIndirectSymbolRecord( unsigned BitCode, ArrayRef<uint64_t> Record) { - // ALIAS_OLD: [alias type, aliasee val#, linkage] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, - // dllstorageclass] - // IFUNC: [alias type, addrspace, aliasee val#, linkage, - // visibility, dllstorageclass] + // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST) + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, + // dllstorageclass] (name in VST) + // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage, + // visibility, dllstorageclass] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); @@ -2839,10 +2935,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( GlobalIndirectSymbol *NewGA; if (BitCode == bitc::MODULE_CODE_ALIAS || BitCode == bitc::MODULE_CODE_ALIAS_OLD) - NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, TheModule); else - NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, nullptr, TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. @@ -4570,8 +4666,8 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const { } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( - BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex) - : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {} + BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex) + : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {} std::pair<GlobalValue::GUID, GlobalValue::GUID> ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { @@ -4580,12 +4676,32 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { return VGI->second; } +void ModuleSummaryIndexBitcodeReader::setValueGUID( + uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName) { + std::string GlobalId = + GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); + auto ValueGUID = GlobalValue::getGUID(GlobalId); + auto OriginalNameID = ValueGUID; + if (GlobalValue::isLocalLinkage(Linkage)) + OriginalNameID = GlobalValue::getGUID(ValueName); + if (PrintSummaryGUIDs) + dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " + << ValueName << "\n"; + ValueIdToCallGraphGUIDMap[ValueID] = + std::make_pair(ValueGUID, OriginalNameID); +} + // Specialized value symbol table parser used when reading module index // blocks where we don't actually create global values. The parsed information // is saved in the bitcode reader for use when later parsing summaries. Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap) { + // With a strtab the VST is not required to parse the summary. + if (UseStrtab) + return Error::success(); + assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); @@ -4627,17 +4743,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string GlobalId = - GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); - auto ValueGUID = GlobalValue::getGUID(GlobalId); - auto OriginalNameID = ValueGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(ValueGUID, OriginalNameID); + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4651,18 +4757,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier( - ValueName, VLI->second, SourceFileName); - auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId); - auto OriginalNameID = FunctionGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(FunctionGUID, OriginalNameID); - + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4749,6 +4844,11 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { switch (BitCode) { default: break; // Default behavior, ignore unknown content. + case bitc::MODULE_CODE_VERSION: { + if (Error Err = parseVersionRecord(Record).takeError()) + return Err; + break; + } /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; @@ -4783,17 +4883,26 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { // was historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; - // GLOBALVAR: [pointer type, isconst, initid, linkage, ...] - // FUNCTION: [type, callingconv, isproto, linkage, ...] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...] + // v1 FUNCTION: [type, callingconv, isproto, linkage, ...] + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v2: [strtab offset, strtab size, v1] case bitc::MODULE_CODE_GLOBALVAR: case bitc::MODULE_CODE_FUNCTION: case bitc::MODULE_CODE_ALIAS: { - if (Record.size() <= 3) + StringRef Name; + ArrayRef<uint64_t> GVRecord; + std::tie(Name, GVRecord) = readNameFromStrtab(Record); + if (GVRecord.size() <= 3) return error("Invalid record"); - uint64_t RawLinkage = Record[3]; + uint64_t RawLinkage = GVRecord[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - ValueIdToLinkageMap[ValueId++] = Linkage; + if (!UseStrtab) { + ValueIdToLinkageMap[ValueId++] = Linkage; + break; + } + + setValueGUID(ValueId++, Name, Linkage, SourceFileName); break; } } @@ -4904,6 +5013,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( switch (BitCode) { default: // Default behavior: ignore. break; + case bitc::FS_VALUE_GUID: { // [valueid, refguid] + uint64_t ValueID = Record[0]; + GlobalValue::GUID RefGUID = Record[1]; + ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID); + break; + } // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, @@ -5208,6 +5323,35 @@ const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } +static Expected<StringRef> readStrtab(BitstreamCursor &Stream) { + if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) + return error("Invalid record"); + + StringRef Strtab; + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::EndBlock: + return Strtab; + + case BitstreamEntry::Error: + return error("Malformed block"); + + case BitstreamEntry::SubBlock: + if (Stream.SkipBlock()) + return error("Malformed block"); + break; + + case BitstreamEntry::Record: + StringRef Blob; + SmallVector<uint64_t, 1> Record; + if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + Strtab = Blob; + break; + } + } +} + //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// @@ -5260,6 +5404,22 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { continue; } + if (Entry.ID == bitc::STRTAB_BLOCK_ID) { + Expected<StringRef> Strtab = readStrtab(Stream); + if (!Strtab) + return Strtab.takeError(); + // This string table is used by every preceding bitcode module that does + // not have its own string table. A bitcode file may have multiple + // string tables if it was created by binary concatenation, for example + // with "llvm-cat -b". + for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + if (!I->Strtab.empty()) + break; + I->Strtab = *Strtab; + } + continue; + } + if (Stream.SkipBlock()) return error("Malformed block"); continue; @@ -5296,8 +5456,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, } Stream.JumpToBit(ModuleBit); - auto *R = - new BitcodeReader(std::move(Stream), ProducerIdentification, Context); + auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification, + Context); std::unique_ptr<Module> M = llvm::make_unique<Module>(ModuleIdentifier, Context); @@ -5332,7 +5492,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() { Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique<ModuleSummaryIndex>(); - ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index); + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index); if (Error Err = R.parseModule(ModuleIdentifier)) return std::move(Err); diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 274dfe89cce5..d089684a052f 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -54,6 +54,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" @@ -452,6 +453,7 @@ class MetadataLoader::MetadataLoaderImpl { bool StripTBAA = false; bool HasSeenOldLoopTags = false; bool NeedUpgradeToDIGlobalVariableExpression = false; + bool NeedDeclareExpressionUpgrade = false; /// True if metadata is being parsed for a module being ThinLTO imported. bool IsImporting = false; @@ -511,6 +513,26 @@ class MetadataLoader::MetadataLoaderImpl { } } + /// Remove a leading DW_OP_deref from DIExpressions in a dbg.declare that + /// describes a function argument. + void upgradeDeclareExpressions(Function &F) { + if (!NeedDeclareExpressionUpgrade) + return; + + for (auto &BB : F) + for (auto &I : BB) + if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) + if (auto *DIExpr = DDI->getExpression()) + if (DIExpr->startsWithDeref() && + dyn_cast_or_null<Argument>(DDI->getAddress())) { + SmallVector<uint64_t, 8> Ops; + Ops.append(std::next(DIExpr->elements_begin()), + DIExpr->elements_end()); + auto *E = DIExpression::get(Context, Ops); + DDI->setOperand(2, MetadataAsValue::get(Context, E)); + } + } + void upgradeDebugInfo() { upgradeCUSubprograms(); upgradeCUVariables(); @@ -565,6 +587,7 @@ public: unsigned size() const { return MetadataList.size(); } void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); } + void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); } }; static Error error(const Twine &Message) { @@ -1520,12 +1543,32 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( return error("Invalid record"); IsDistinct = Record[0] & 1; - bool HasOpFragment = Record[0] & 2; + uint64_t Version = Record[0] >> 1; auto Elts = MutableArrayRef<uint64_t>(Record).slice(1); - if (!HasOpFragment) - if (unsigned N = Elts.size()) - if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece) - Elts[N - 3] = dwarf::DW_OP_LLVM_fragment; + unsigned N = Elts.size(); + // Perform various upgrades. + switch (Version) { + case 0: + if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece) + Elts[N - 3] = dwarf::DW_OP_LLVM_fragment; + LLVM_FALLTHROUGH; + case 1: + // Move DW_OP_deref to the end. + if (N && Elts[0] == dwarf::DW_OP_deref) { + auto End = Elts.end(); + if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) + End = std::prev(End, 3); + std::move(std::next(Elts.begin()), End, Elts.begin()); + *std::prev(End) = dwarf::DW_OP_deref; + } + NeedDeclareExpressionUpgrade = true; + LLVM_FALLTHROUGH; + case 2: + // Up-to-date! + break; + default: + return error("Invalid record"); + } MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), @@ -1858,3 +1901,7 @@ bool MetadataLoader::isStrippingTBAA() { return Pimpl->isStrippingTBAA(); } unsigned MetadataLoader::size() const { return Pimpl->size(); } void MetadataLoader::shrinkTo(unsigned N) { return Pimpl->shrinkTo(N); } + +void MetadataLoader::upgradeDebugIntrinsics(Function &F) { + return Pimpl->upgradeDebugIntrinsics(F); +} diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h index 442dfc94e4e1..f23dcc06cc94 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h @@ -79,6 +79,9 @@ public: unsigned size() const; void shrinkTo(unsigned N); + + /// Perform bitcode upgrades on llvm.dbg.* calls. + void upgradeDebugIntrinsics(Function &F); }; } diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 043441bac4de..1d3cde2f5ddb 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" @@ -76,26 +77,28 @@ protected: /// The stream created and owned by the client. BitstreamWriter &Stream; - /// Saves the offset of the VSTOffset record that must eventually be - /// backpatched with the offset of the actual VST. - uint64_t VSTOffsetPlaceholder = 0; - public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} protected: - bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; } - void writeValueSymbolTableForwardDecl(); void writeBitcodeHeader(); + void writeModuleVersion(); }; +void BitcodeWriterBase::writeModuleVersion() { + // VERSION: [version#] + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2}); +} + /// Class to manage the bitcode writing for a module. class ModuleBitcodeWriter : public BitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. const SmallVectorImpl<char> &Buffer; + StringTableBuilder &StrtabBuilder; + /// The Module to write to bitcode. const Module &M; @@ -127,15 +130,20 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId; + /// Saves the offset of the VSTOffset record that must eventually be + /// backpatched with the offset of the actual VST. + uint64_t VSTOffsetPlaceholder = 0; + public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer, + StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M), - VE(*M, ShouldPreserveUseListOrder), Index(Index), + : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder), + M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { // Assign ValueIds to any callee values in the index that came from @@ -169,6 +177,7 @@ private: void writeAttributeTable(); void writeTypeTable(); void writeComdats(); + void writeValueSymbolTableForwardDecl(); void writeModuleInfo(); void writeValueAsMetadata(const ValueAsMetadata *MD, SmallVectorImpl<uint64_t> &Record); @@ -261,9 +270,9 @@ private: SmallVectorImpl<uint64_t> &Vals); void writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl<unsigned> &Vals); - void writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel = false, - DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex = nullptr); + void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST); + void writeGlobalValueSymbolTable( + DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex); void writeUseList(UseListOrder &&Order); void writeUseListBlock(const Function *F); void @@ -477,7 +486,6 @@ public: private: void writeModStrings(); - void writeCombinedValueSymbolTable(); void writeCombinedGlobalValueSummary(); /// Indicates whether the provided \p ModulePath should be written into @@ -492,15 +500,15 @@ private: const auto &VMI = GUIDToValueIdMap.find(ValGUID); return VMI != GUIDToValueIdMap.end(); } + void assignValueId(GlobalValue::GUID ValGUID) { + unsigned &ValueId = GUIDToValueIdMap[ValGUID]; + if (ValueId == 0) + ValueId = ++GlobalValueId; + } unsigned getValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - // If this GUID doesn't have an entry, assign one. - if (VMI == GUIDToValueIdMap.end()) { - GUIDToValueIdMap[ValGUID] = ++GlobalValueId; - return GlobalValueId; - } else { - return VMI->second; - } + auto VMI = GUIDToValueIdMap.find(ValGUID); + assert(VMI != GUIDToValueIdMap.end()); + return VMI->second; } std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; } }; @@ -1047,13 +1055,10 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { void ModuleBitcodeWriter::writeComdats() { SmallVector<unsigned, 64> Vals; for (const Comdat *C : VE.getComdats()) { - // COMDAT: [selection_kind, name] + // COMDAT: [strtab offset, strtab size, selection_kind] + Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); - size_t Size = C->getName().size(); - assert(isUInt<32>(Size)); - Vals.push_back(Size); - for (char Chr : C->getName()) - Vals.push_back((unsigned char)Chr); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); Vals.clear(); } @@ -1062,7 +1067,7 @@ void ModuleBitcodeWriter::writeComdats() { /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Saves the bit offset to backpatch. -void BitcodeWriterBase::writeValueSymbolTableForwardDecl() { +void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be @@ -1165,6 +1170,8 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Add an abbrev for common globals with no visibility or thread localness. auto Abbv = std::make_shared<BitCodeAbbrev>(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxGlobalType+1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 @@ -1188,15 +1195,42 @@ void ModuleBitcodeWriter::writeModuleInfo() { SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv)); } - // Emit the global variable information. SmallVector<unsigned, 64> Vals; + // Emit the module's source file name. + { + StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), + M.getSourceFileName().size()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + auto Abbv = std::make_shared<BitCodeAbbrev>(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const auto P : M.getSourceFileName()) + Vals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } + + // Emit the global variable information. for (const GlobalVariable &GV : M.globals()) { unsigned AbbrevToUse = 0; - // GLOBALVAR: [type, isconst, initid, + // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat] + Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : @@ -1226,9 +1260,12 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the function proto information. for (const Function &F : M) { - // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility, gc, unnamed_addr, prologuedata, - // dllstorageclass, comdat, prefixdata, personalityfn] + // FUNCTION: [strtab offset, strtab size, type, callingconv, isproto, + // linkage, paramattrs, alignment, section, visibility, gc, + // unnamed_addr, prologuedata, dllstorageclass, comdat, + // prefixdata, personalityfn] + Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); @@ -1255,8 +1292,10 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the alias information. for (const GlobalAlias &A : M.aliases()) { - // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass, - // threadlocal, unnamed_addr] + // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, + // visibility, dllstorageclass, threadlocal, unnamed_addr] + Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); @@ -1272,7 +1311,10 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { - // IFUNC: [ifunc type, address space, resolver val#, linkage, visibility] + // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver + // val#, linkage, visibility] + Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(I.getResolver())); @@ -1282,34 +1324,6 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.clear(); } - // Emit the module's source file name. - { - StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), - M.getSourceFileName().size()); - BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); - if (Bits == SE_Char6) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); - else if (Bits == SE_Fixed7) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); - - // MODULE_CODE_SOURCE_FILENAME: [namechar x N] - auto Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(AbbrevOpToUse); - unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - - for (const auto P : M.getSourceFileName()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); - Vals.clear(); - } - - // If we have a VST, write the VSTOFFSET record placeholder. - if (M.getValueSymbolTable().empty()) - return; writeValueSymbolTableForwardDecl(); } @@ -1757,9 +1771,8 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); - - const uint64_t HasOpFragmentFlag = 1 << 1; - Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag); + const uint64_t Version = 2 << 1; + Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev); @@ -2839,77 +2852,59 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.clear(); } -/// Emit names for globals/functions etc. \p IsModuleLevel is true when -/// we are writing the module-level VST, where we are including a function -/// bitcode index and need to backpatch the VST forward declaration record. -void ModuleBitcodeWriter::writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel, - DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex) { - if (VST.empty()) { - // writeValueSymbolTableForwardDecl should have returned early as - // well. Ensure this handling remains in sync by asserting that - // the placeholder offset is not set. - assert(!IsModuleLevel || !hasVSTOffsetPlaceholder()); - return; - } +/// Write a GlobalValue VST to the module. The purpose of this data structure is +/// to allow clients to efficiently find the function body. +void ModuleBitcodeWriter::writeGlobalValueSymbolTable( + DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex) { + // Get the offset of the VST we are writing, and backpatch it into + // the VST forward declaration record. + uint64_t VSTOffset = Stream.GetCurrentBitNo(); + // The BitcodeStartBit was the stream offset of the identification block. + VSTOffset -= bitcodeStartBit(); + assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + // Note that we add 1 here because the offset is relative to one word + // before the start of the identification block, which was historically + // always the start of the regular bitcode header. + Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); + + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + auto Abbv = std::make_shared<BitCodeAbbrev>(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const Function &F : M) { + uint64_t Record[2]; - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - // The BitcodeStartBit was the stream offset of the identification block. - VSTOffset -= bitcodeStartBit(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + if (F.isDeclaration()) + continue; + + Record[0] = VE.getValueID(&F); + + // Save the word offset of the function (from the start of the + // actual bitcode written to the stream). + uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit(); + assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); // Note that we add 1 here because the offset is relative to one word // before the start of the identification block, which was historically // always the start of the regular bitcode header. - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); - } - - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + Record[1] = BitcodeIndex / 32 + 1; - // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY - // records, which are not used in the per-function VSTs. - unsigned FnEntry8BitAbbrev; - unsigned FnEntry7BitAbbrev; - unsigned FnEntry6BitAbbrev; - unsigned GUIDEntryAbbrev; - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // 8-bit fixed-width VST_CODE_FNENTRY function strings. - auto Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev); + } - // 7-bit fixed width VST_CODE_FNENTRY function strings. - Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.ExitBlock(); +} - // 6-bit char6 VST_CODE_FNENTRY function strings. - Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); - FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); +/// Emit names for arguments, instructions and basic blocks in a function. +void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable( + const ValueSymbolTable &VST) { + if (VST.empty()) + return; - // FIXME: Change the name of this record as it is now used by - // the per-module index as well. - Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - } + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. @@ -2923,38 +2918,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable( unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); - Function *F = dyn_cast<Function>(Name.getValue()); - // VST_CODE_ENTRY: [valueid, namechar x N] - // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N] // VST_CODE_BBENTRY: [bbid, namechar x N] unsigned Code; if (isa<BasicBlock>(Name.getValue())) { Code = bitc::VST_CODE_BBENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; - } else if (F && !F->isDeclaration()) { - // Must be the module-level VST, where we pass in the Index and - // have a VSTOffsetPlaceholder. The function-level VST should not - // contain any Function symbols. - assert(FunctionToBitcodeIndex); - assert(hasVSTOffsetPlaceholder()); - - // Save the word offset of the function (from the start of the - // actual bitcode written to the stream). - uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit(); - assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); - // Note that we add 1 here because the offset is relative to one word - // before the start of the identification block, which was historically - // always the start of the regular bitcode header. - NameVals.push_back(BitcodeIndex / 32 + 1); - - Code = bitc::VST_CODE_FNENTRY; - AbbrevToUse = FnEntry8BitAbbrev; - if (Bits == SE_Char6) - AbbrevToUse = FnEntry6BitAbbrev; - else if (Bits == SE_Fixed7) - AbbrevToUse = FnEntry7BitAbbrev; } else { Code = bitc::VST_CODE_ENTRY; if (Bits == SE_Char6) @@ -2970,47 +2940,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable( Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } - // Emit any GUID valueIDs created for indirect call edges into the - // module-level VST. - if (IsModuleLevel && hasVSTOffsetPlaceholder()) - for (const auto &GI : valueIds()) { - NameVals.push_back(GI.second); - NameVals.push_back(GI.first); - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, - GUIDEntryAbbrev); - NameVals.clear(); - } - Stream.ExitBlock(); -} - -/// Emit function names and summary offsets for the combined index -/// used by ThinLTO. -void IndexBitcodeWriter::writeCombinedValueSymbolTable() { - assert(hasVSTOffsetPlaceholder() && "Expected non-zero VSTOffsetPlaceholder"); - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); - - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - - auto Abbv = std::make_shared<BitCodeAbbrev>(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - SmallVector<uint64_t, 64> NameVals; - for (const auto &GVI : valueIds()) { - // VST_CODE_COMBINED_ENTRY: [valueid, refguid] - NameVals.push_back(GVI.second); - NameVals.push_back(GVI.first); - - // Emit the finished record. - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev); - NameVals.clear(); - } Stream.ExitBlock(); } @@ -3114,7 +3044,7 @@ void ModuleBitcodeWriter::writeFunction( // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) - writeValueSymbolTable(*Symtab); + writeFunctionLevelValueSymbolTable(*Symtab); if (NeedsMetadataAttachment) writeFunctionMetadataAttachment(F); @@ -3502,6 +3432,11 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { return; } + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef<uint64_t>{GVI.second, GVI.first}); + } + // Abbrev for FS_PERMODULE. auto Abbv = std::make_shared<BitCodeAbbrev>(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); @@ -3594,6 +3529,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION}); + // Create value IDs for undefined references. + for (const auto &I : *this) { + if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) { + for (auto &RI : VS->refs()) + assignValueId(RI.getGUID()); + continue; + } + + auto *FS = dyn_cast<FunctionSummary>(I.second); + if (!FS) + continue; + for (auto &RI : FS->refs()) + assignValueId(RI.getGUID()); + + for (auto &EI : FS->calls()) { + GlobalValue::GUID GUID = EI.first.getGUID(); + if (!hasValueId(GUID)) { + // For SamplePGO, the indirect call targets for local functions will + // have its original name annotated in profile. We try to find the + // corresponding PGOFuncName as the GUID. + GUID = Index.getGUIDFromOriginalID(GUID); + if (GUID == 0 || !hasValueId(GUID)) + continue; + } + assignValueId(GUID); + } + } + + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef<uint64_t>{GVI.second, GVI.first}); + } + // Abbrev for FS_COMBINED. auto Abbv = std::make_shared<BitCodeAbbrev>(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED)); @@ -3808,10 +3776,7 @@ void ModuleBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); - SmallVector<unsigned, 1> Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + writeModuleVersion(); // Emit blockinfo, which defines the standard abbreviations etc. writeBlockInfo(); @@ -3857,8 +3822,7 @@ void ModuleBitcodeWriter::write() { if (Index) writePerModuleGlobalValueSummary(); - writeValueSymbolTable(M.getValueSymbolTable(), - /* IsModuleLevel */ true, &FunctionToBitcodeIndex); + writeGlobalValueSymbolTable(FunctionToBitcodeIndex); writeModuleHash(BlockStartPos); @@ -3946,13 +3910,45 @@ BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer) writeBitcodeHeader(*Stream); } -BitcodeWriter::~BitcodeWriter() = default; +BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); } + +void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { + Stream->EnterSubblock(Block, 3); + + auto Abbv = std::make_shared<BitCodeAbbrev>(); + Abbv->Add(BitCodeAbbrevOp(Record)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv)); + + Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef<uint64_t>{Record}, Blob); + + Stream->ExitBlock(); +} + +void BitcodeWriter::writeStrtab() { + assert(!WroteStrtab); + + std::vector<char> Strtab; + StrtabBuilder.finalizeInOrder(); + Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)Strtab.data()); + + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, + {Strtab.data(), Strtab.size()}); + + WroteStrtab = true; +} + +void BitcodeWriter::copyStrtab(StringRef Strtab) { + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab); + WroteStrtab = true; +} void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { - ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream, + ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); ModuleWriter.write(); @@ -3976,6 +3972,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); @@ -3987,13 +3984,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - SmallVector<unsigned, 1> Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - - // If we have a VST, write the VSTOFFSET record placeholder. - writeValueSymbolTableForwardDecl(); + writeModuleVersion(); // Write the module paths in the combined index. writeModStrings(); @@ -4001,10 +3992,6 @@ void IndexBitcodeWriter::write() { // Write the summary combined index records. writeCombinedGlobalValueSummary(); - // Need a special VST writer for the combined index (we don't have a - // real VST and real values when this is invoked). - writeCombinedValueSymbolTable(); - Stream.ExitBlock(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6c18d56b8272..028c79f3ab6d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -834,9 +834,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << " <- "; // The second operand is only an offset if it's an immediate. - bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); - int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; - + bool Deref = false; + bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; for (unsigned i = 0; i < Expr->getNumElements(); ++i) { uint64_t Op = Expr->getElement(i); if (Op == dwarf::DW_OP_LLVM_fragment) { @@ -844,7 +844,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { break; } else if (Deref) { // We currently don't support extra Offsets or derefs after the first - // one. Bail out early instead of emitting an incorrect comment + // one. Bail out early instead of emitting an incorrect comment. OS << " [complex expression]"; AP.OutStreamer->emitRawComment(OS.str()); return true; @@ -899,12 +899,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer->emitRawComment(OS.str()); return true; } - if (Deref) + if (MemLoc || Deref) OS << '['; OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } - if (Deref) + if (MemLoc || Deref) OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. @@ -1356,7 +1356,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); OutStreamer->EmitLabel(AddrSymbol); - unsigned PtrSize = M.getDataLayout().getPointerSize(0); + unsigned PtrSize = MAI->getCodePointerSize(); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), PtrSize); } @@ -2246,7 +2246,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] ExtraBits = Realigned.getRawData()[0] & (((uint64_t)-1) >> (64 - ExtraBitsSize)); - Realigned = Realigned.lshr(ExtraBitsSize); + Realigned.lshrInPlace(ExtraBitsSize); } else ExtraBits = Realigned.getRawData()[BitWidth / 64]; } @@ -2781,7 +2781,7 @@ void AsmPrinter::emitXRayTable() { // before the function's end, we assume that this is happening after // the last return instruction. - auto WordSizeBytes = TM.getPointerSize(); + auto WordSizeBytes = MAI->getCodePointerSize(); MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); OutStreamer->EmitCodeAlignment(16); OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 383b8cddb1a0..2571f6869651 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1136,7 +1136,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { DITypeRef ElementTypeRef = Ty->getBaseType(); TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef); // IndexType is size_t, which depends on the bitness of the target. - TypeIndex IndexType = Asm->MAI->getPointerSize() == 8 + TypeIndex IndexType = Asm->TM.getPointerSize() == 8 ? TypeIndex(SimpleTypeKind::UInt64Quad) : TypeIndex(SimpleTypeKind::UInt32Long); @@ -1342,8 +1342,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); - PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64 - : PointerKind::Near32; + PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64 + : PointerKind::Near32; bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction : PointerMode::PointerToDataMember; @@ -1458,7 +1458,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, } TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { - unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize()); + unsigned VSlotCount = + Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize()); SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near); VFTableShapeRecord VFTSR(Slots); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index b510e0ef36ac..31c2b3b5e752 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -31,6 +31,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + //===----------------------------------------------------------------------===// // DIEAbbrevData Implementation //===----------------------------------------------------------------------===// @@ -79,15 +81,22 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { dwarf::AttributeString(AttrData.getAttribute()).data()); // Emit form type. +#ifndef NDEBUG + // Could be an assertion, but this way we can see the failing form code + // easily, which helps track down where it came from. + if (!dwarf::isValidFormForVersion(AttrData.getForm(), + AP->getDwarfVersion())) { + DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) + << " for DWARF version " << AP->getDwarfVersion() << "\n"); + llvm_unreachable("Invalid form for specified DWARF version"); + } +#endif AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm()).data()); // Emit value for DW_FORM_implicit_const. - if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) { - assert(AP->getDwarfVersion() >= 5 && - "DW_FORM_implicit_const is supported starting from DWARFv5"); + if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) AP->EmitSLEB128(AttrData.getValue()); - } } // Mark end of abbreviation. @@ -518,7 +527,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } LLVM_DUMP_METHOD @@ -540,7 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } LLVM_DUMP_METHOD @@ -682,7 +691,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return getULEB128Size(Entry->getOffset()); case dwarf::DW_FORM_ref_addr: if (AP->getDwarfVersion() == 2) - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); switch (AP->OutStreamer->getContext().getDwarfFormat()) { case dwarf::DWARF32: return 4; @@ -808,7 +817,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } /// EmitValue - Emit label value. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a550ff2fb90f..738e062cb93f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -547,18 +547,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto &Fragment : DV.getFrameIndexExprs()) { unsigned FrameReg = 0; + const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); - DwarfExpr.addFragmentOffset(Fragment.Expr); + DwarfExpr.addFragmentOffset(Expr); SmallVector<uint64_t, 8> Ops; Ops.push_back(dwarf::DW_OP_plus); Ops.push_back(Offset); - Ops.push_back(dwarf::DW_OP_deref); - Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end()); - DIExpressionCursor Expr(Ops); + Ops.append(Expr->elements_begin(), Expr->elements_end()); + DIExpressionCursor Cursor(Ops); + DwarfExpr.setMemoryLocationKind(); DwarfExpr.addMachineRegExpression( - *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg); - DwarfExpr.addExpression(std::move(Expr)); + *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); + DwarfExpr.addExpression(std::move(Cursor)); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); @@ -779,12 +780,13 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 8> Ops; - if (Location.isIndirect()) { + if (Location.isIndirect() && Location.getOffset()) { Ops.push_back(dwarf::DW_OP_plus); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } DIExpressionCursor Cursor(Ops); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); @@ -807,12 +809,13 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *DIExpr = DV.getSingleExpression(); DwarfExpr.addFragmentOffset(DIExpr); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 8> Ops; - if (Location.isIndirect()) { + if (Location.isIndirect() && Location.getOffset()) { Ops.push_back(dwarf::DW_OP_plus); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpressionCursor Cursor(Ops); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5ce111309208..d72656bcc58d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1517,13 +1517,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.addUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Location = Value.getLoc(); - + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 8> Ops; - // FIXME: Should this condition be Location.isIndirect() instead? - if (Location.getOffset()) { + if (Location.isIndirect() && Location.getOffset()) { Ops.push_back(dwarf::DW_OP_plus); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpressionCursor Cursor(Ops); @@ -1578,7 +1577,7 @@ void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->MAI->getCodePointerSize(); for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); const DwarfCompileUnit *CU = List.CU; @@ -1708,7 +1707,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfARangesSection()); - unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + unsigned PtrSize = Asm->MAI->getCodePointerSize(); // Build a list of CUs used. std::vector<DwarfCompileUnit *> CUs; @@ -1791,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() { Asm->getObjFileLowering().getDwarfRangesSection()); // Size for our labels. - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->MAI->getCodePointerSize(); // Grab the specific ranges for the compile units in the module. for (const auto &I : CUMap) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index debe88f3b1ee..f65dc151f301 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -23,9 +23,12 @@ using namespace llvm; void DwarfExpression::addReg(int DwarfReg, const char *Comment) { - assert(DwarfReg >= 0 && "invalid negative dwarf register number"); - if (DwarfReg < 32) { - emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert((LocationKind == Unknown || LocationKind == Register) && + "location description already locked down"); + LocationKind = Register; + if (DwarfReg < 32) { + emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); } else { emitOp(dwarf::DW_OP_regx, Comment); emitUnsigned(DwarfReg); @@ -34,6 +37,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) { void DwarfExpression::addBReg(int DwarfReg, int Offset) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert(LocationKind != Register && "location description already locked down"); if (DwarfReg < 32) { emitOp(dwarf::DW_OP_breg0 + DwarfReg); } else { @@ -156,18 +160,23 @@ void DwarfExpression::addStackValue() { } void DwarfExpression::addSignedConstant(int64_t Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; emitOp(dwarf::DW_OP_consts); emitSigned(Value); - addStackValue(); } void DwarfExpression::addUnsignedConstant(uint64_t Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; emitOp(dwarf::DW_OP_constu); emitUnsigned(Value); - addStackValue(); } void DwarfExpression::addUnsignedConstant(const APInt &Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; + unsigned Size = Value.getBitWidth(); const uint64_t *Data = Value.getRawData(); @@ -178,7 +187,8 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) { addUnsignedConstant(*Data++); if (Offset == 0 && Size <= 64) break; - addOpPiece(std::min(Size-Offset, 64u), Offset); + addStackValue(); + addOpPiece(std::min(Size - Offset, 64u), Offset); Offset += 64; } } @@ -206,7 +216,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, } // Handle simple register locations. - if (!HasComplexExpression) { + if (LocationKind != Memory && !HasComplexExpression) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); @@ -216,62 +226,65 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } + // Don't emit locations that cannot be expressed without DW_OP_stack_value. + if (DwarfVersion < 4) + if (std::any_of(ExprCursor.begin(), ExprCursor.end(), + [](DIExpression::ExprOperand Op) -> bool { + return Op.getOp() == dwarf::DW_OP_stack_value; + })) { + DwarfRegs.clear(); + return false; + } + assert(DwarfRegs.size() == 1); auto Reg = DwarfRegs[0]; - bool FBReg = isFrameRegister(TRI, MachineReg); + bool FBReg = isFrameRegister(TRI, MachineReg); + int SignedOffset = 0; assert(Reg.Size == 0 && "subregister has same size as superregister"); // Pattern-match combinations for which more efficient representations exist. - switch (Op->getOp()) { - default: { - if (FBReg) - addFBReg(0); - else - addReg(Reg.DwarfRegNo, 0); - break; + // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]. + // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset]. + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && ((Op->getOp() == dwarf::DW_OP_plus) || + (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { + int Offset = Op->getArg(0); + SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; + ExprCursor.take(); } - case dwarf::DW_OP_plus: - case dwarf::DW_OP_minus: { - // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. - // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. - auto N = ExprCursor.peekNext(); - if (N && N->getOp() == dwarf::DW_OP_deref) { - int Offset = Op->getArg(0); - int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; - if (FBReg) - addFBReg(SignedOffset); - else - addBReg(Reg.DwarfRegNo, SignedOffset); + if (FBReg) + addFBReg(SignedOffset); + else + addBReg(Reg.DwarfRegNo, SignedOffset); + DwarfRegs.clear(); + return true; +} - ExprCursor.consume(2); +/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". +static bool isMemoryLocation(DIExpressionCursor ExprCursor) { + while (ExprCursor) { + auto Op = ExprCursor.take(); + switch (Op->getOp()) { + case dwarf::DW_OP_deref: + case dwarf::DW_OP_LLVM_fragment: break; + default: + return false; } - addReg(Reg.DwarfRegNo, 0); - break; - } - case dwarf::DW_OP_deref: - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - if (FBReg) - addFBReg(0); - else - addBReg(Reg.DwarfRegNo, 0); - ExprCursor.take(); - break; } - DwarfRegs.clear(); return true; } void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, unsigned FragmentOffsetInBits) { + // If we need to mask out a subregister, do it now, unless the next + // operation would emit an OpPiece anyway. + auto N = ExprCursor.peek(); + if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment)) + maskSubRegister(); + while (ExprCursor) { auto Op = ExprCursor.take(); - - // If we need to mask out a subregister, do it now, unless the next - // operation would emit an OpPiece anyway. - if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment) - maskSubRegister(); - switch (Op->getOp()) { case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); @@ -281,50 +294,74 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, // location. assert(OffsetInBits >= FragmentOffset && "fragment offset not added?"); - // If \a addMachineReg already emitted DW_OP_piece operations to represent + // If addMachineReg already emitted DW_OP_piece operations to represent // a super-register by splicing together sub-registers, subtract the size // of the pieces that was already emitted. SizeInBits -= OffsetInBits - FragmentOffset; - // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a + // If addMachineReg requested a DW_OP_bit_piece to stencil out a // sub-register that is smaller than the current fragment's size, use it. if (SubRegisterSizeInBits) SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits); - + + // Emit a DW_OP_stack_value for implicit location descriptions. + if (LocationKind == Implicit) + addStackValue(); + + // Emit the DW_OP_piece. addOpPiece(SizeInBits, SubRegisterOffsetInBits); setSubRegisterPiece(0, 0); - break; + // Reset the location description kind. + LocationKind = Unknown; + return; } case dwarf::DW_OP_plus: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_minus: - // There is no OP_minus_uconst. + assert(LocationKind != Register); + // There is no DW_OP_minus_uconst. emitOp(dwarf::DW_OP_constu); emitUnsigned(Op->getArg(0)); emitOp(dwarf::DW_OP_minus); break; - case dwarf::DW_OP_deref: - emitOp(dwarf::DW_OP_deref); + case dwarf::DW_OP_deref: { + assert(LocationKind != Register); + if (LocationKind != Memory && isMemoryLocation(ExprCursor)) + // Turning this into a memory location description makes the deref + // implicit. + LocationKind = Memory; + else + emitOp(dwarf::DW_OP_deref); break; + } case dwarf::DW_OP_constu: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_constu); emitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_stack_value: - addStackValue(); + assert(LocationKind == Unknown || LocationKind == Implicit); + LocationKind = Implicit; break; case dwarf::DW_OP_swap: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_swap); break; case dwarf::DW_OP_xderef: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_xderef); break; default: llvm_unreachable("unhandled opcode found in expression"); } } + + if (LocationKind == Implicit) + // Turn this into an implicit location description. + addStackValue(); } /// add masking operations to stencil out a subregister. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index e8dc211eb3c2..de8613200067 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -72,6 +72,8 @@ public: } /// Determine whether there are any operations left in this expression. operator bool() const { return Start != End; } + DIExpression::expr_op_iterator begin() const { return Start; } + DIExpression::expr_op_iterator end() const { return End; } /// Retrieve the fragment information, if any. Optional<DIExpression::FragmentInfo> getFragmentInfo() const { @@ -102,6 +104,9 @@ protected: unsigned SubRegisterSizeInBits = 0; unsigned SubRegisterOffsetInBits = 0; + /// The kind of location description being produced. + enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown; + /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed /// to represent a subregister. void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) { @@ -122,7 +127,8 @@ protected: /// current function. virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; - /// Emit a DW_OP_reg operation. + /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF + /// register location description. void addReg(int DwarfReg, const char *Comment = nullptr); /// Emit a DW_OP_breg operation. void addBReg(int DwarfReg, int Offset); @@ -185,11 +191,18 @@ public: /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); + /// Lock this down to become a memory location description. + void setMemoryLocationKind() { + assert(LocationKind == Unknown); + LocationKind = Memory; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. /// - /// \param FragmentOffsetInBits If this is one fragment out of a fragmented + /// \param FragmentOffsetInBits If this is one fragment out of a + /// fragmented /// location, this is the offset of the /// fragment inside the entire variable. /// \return false if no DWARF register exists diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bad5b09553cd..bac0c204d04f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -73,8 +74,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node), - Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { + : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag), + CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { } DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, @@ -471,12 +472,13 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // variable's location. DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 9> Ops; - if (Location.isIndirect()) { + if (Location.isIndirect() && Location.getOffset()) { Ops.push_back(dwarf::DW_OP_plus); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -1546,7 +1548,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { Asm->OutStreamer->AddComment("DWARF Unit Type"); Asm->EmitInt8(UT); Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->MAI->getCodePointerSize()); } // We share one abbreviations table across all units so it's always at the @@ -1562,7 +1564,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { if (Version <= 4) { Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->MAI->getCodePointerSize()); } } diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 2bdd189557b4..c862cfd28add 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -570,8 +570,14 @@ bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) { ValueToValueMapTy VMap; BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); - for (BasicBlock *Pred : OtherPreds) - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + for (BasicBlock *Pred : OtherPreds) { + // If the target is a loop to itself, then the terminator of the split + // block needs to be updated. + if (Pred == Target) + BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + else + Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + } // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that // they are clones, so the number of PHIs are the same. @@ -5059,16 +5065,14 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { if (!ShlC) return false; uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); - auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); - DemandBits |= ShlDemandBits; + DemandBits.setLowBits(BitWidth - ShiftAmt); break; } case llvm::Instruction::Trunc: { EVT TruncVT = TLI->getValueType(*DL, I->getType()); unsigned TruncBitWidth = TruncVT.getSizeInBits(); - auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); - DemandBits |= TruncBits; + DemandBits.setLowBits(TruncBitWidth); break; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 766187378446..5fb8dfc95d3f 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -381,18 +381,19 @@ bool IRTranslator::translateInsertValue(const User &U, uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); unsigned Res = getOrCreateVReg(U); - const Value &Inserted = *U.getOperand(1); - MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted), - Offset); + unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); + MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); return true; } bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1)), - getOrCreateVReg(*U.getOperand(2))); + unsigned Res = getOrCreateVReg(U); + unsigned Tst = getOrCreateVReg(*U.getOperand(0)); + unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); + MIRBuilder.buildSelect(Res, Tst, Op0, Op1); return true; } @@ -984,9 +985,11 @@ bool IRTranslator::translateInsertElement(const User &U, ValToVReg[&U] = Elt; return true; } - MIRBuilder.buildInsertVectorElement( - getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2))); + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + unsigned Idx = getOrCreateVReg(*U.getOperand(2)); + MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } @@ -999,9 +1002,10 @@ bool IRTranslator::translateExtractElement(const User &U, ValToVReg[&U] = Elt; return true; } - MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U), - getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1))); + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Idx = getOrCreateVReg(*U.getOperand(1)); + MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index fb9d01ef8542..942680b6fff3 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -68,23 +68,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands( return true; } -Optional<int64_t> -InstructionSelector::getConstantVRegVal(unsigned VReg, - const MachineRegisterInfo &MRI) const { - MachineInstr *MI = MRI.getVRegDef(VReg); - if (MI->getOpcode() != TargetOpcode::G_CONSTANT) - return None; - - if (MI->getOperand(1).isImm()) - return MI->getOperand(1).getImm(); - - if (MI->getOperand(1).isCImm() && - MI->getOperand(1).getCImm()->getBitWidth() <= 64) - return MI->getOperand(1).getCImm()->getSExtValue(); - - return None; -} - bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 657ddb307919..74ed58e8d049 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -24,6 +24,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <iterator> + #define DEBUG_TYPE "legalizer" using namespace llvm; @@ -161,7 +163,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // convergence for performance reasons. bool Changed = false; MachineBasicBlock::iterator NextMI; - for (auto &MBB : MF) + for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. @@ -171,18 +173,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // and are assumed to be legal. if (!isPreISelGenericOpcode(MI->getOpcode())) continue; + unsigned NumNewInsns = 0; SmallVector<MachineInstr *, 4> WorkList; - Helper.MIRBuilder.recordInsertions( - [&](MachineInstr *MI) { WorkList.push_back(MI); }); + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + ++NumNewInsns; + WorkList.push_back(MI); + }); WorkList.push_back(&*MI); + bool Changed = false; LegalizerHelper::LegalizeResult Res; unsigned Idx = 0; do { Res = Helper.legalizeInstrStep(*WorkList[Idx]); // Error out if we couldn't legalize this instruction. We may want to - // fall - // back to DAG ISel instead in the future. + // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { Helper.MIRBuilder.stopRecordingInsertions(); if (Res == LegalizerHelper::UnableToLegalize) { @@ -194,10 +199,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { } Changed |= Res == LegalizerHelper::Legalized; ++Idx; + +#ifndef NDEBUG + if (NumNewInsns) { + DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n"); + for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end(); + I != E; ++I) + DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs())); + NumNewInsns = 0; + } +#endif } while (Idx < WorkList.size()); Helper.MIRBuilder.stopRecordingInsertions(); } + } MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -207,7 +223,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // good chance MI will be deleted. NextMI = std::next(MI); - Changed |= combineExtracts(*MI, MRI, TII); + // combineExtracts erases MI. + if (combineExtracts(*MI, MRI, TII)) { + Changed = true; + continue; + } Changed |= combineMerges(*MI, MRI, TII); } } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 20358f7ee6c2..58778077bc0e 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -24,7 +24,7 @@ #include <sstream> -#define DEBUG_TYPE "legalize-mir" +#define DEBUG_TYPE "legalizer" using namespace llvm; @@ -35,24 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF) LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { + DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + auto Action = LI.getAction(MI, MRI); switch (std::get<0>(Action)) { case LegalizerInfo::Legal: + DEBUG(dbgs() << ".. Already legal\n"); return AlreadyLegal; case LegalizerInfo::Libcall: + DEBUG(dbgs() << ".. Convert to libcall\n"); return libcall(MI); case LegalizerInfo::NarrowScalar: + DEBUG(dbgs() << ".. Narrow scalar\n"); return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::WidenScalar: + DEBUG(dbgs() << ".. Widen scalar\n"); return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Lower: + DEBUG(dbgs() << ".. Lower\n"); return lower(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::FewerElements: + DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Custom: + DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized : UnableToLegalize; default: + DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; } } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 8d1a263395a0..54ef7e5c5a1b 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -592,7 +592,7 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res, LLT EltTy = MRI->getType(Elt); LLT IdxTy = MRI->getType(Idx); assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type"); - assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); assert(ResTy.getElementType() == EltTy && "type mismatch"); #endif @@ -612,7 +612,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, LLT ValTy = MRI->getType(Val); LLT IdxTy = MRI->getType(Idx); assert(ValTy.isVector() && "invalid operand type"); - assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type"); + assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); assert(ValTy.getElementType() == ResTy && "type mismatch"); #endif diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 606a59680a3d..3c93f8123b0d 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -93,3 +94,19 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, R << Msg << ": " << ore::MNV("Inst", MI); reportGISelFailure(MF, TPC, MORE, R); } + +Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + + if (MI->getOperand(1).isImm()) + return MI->getOperand(1).getImm(); + + if (MI->getOperand(1).isCImm() && + MI->getOperand(1).getCImm()->getBitWidth() <= 64) + return MI->getOperand(1).getCImm()->getSExtValue(); + + return None; +} diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index a1cb0a0695bf..b7ab404070b1 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -888,20 +888,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - bool IsIndirect = MI->isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getDebugVariable(); - const MDNode *Expr = MI->getDebugExpression(); - DebugLoc DL = MI->getDebugLoc(); - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); - assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(StackSlot) - .addImm(Offset) - .addMetadata(Var) - .addMetadata(Expr); + DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); + buildDbgValueForSpill(*MBB, MI, *MI, StackSlot); + MBB->erase(MI); continue; } diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp index c4b9068fa905..1c682e72fa49 100644 --- a/contrib/llvm/lib/CodeGen/LowLevelType.cpp +++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp @@ -21,10 +21,10 @@ using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { if (auto VTy = dyn_cast<VectorType>(&Ty)) { auto NumElements = VTy->getNumElements(); - auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits(); + LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL); if (NumElements == 1) - return LLT::scalar(ScalarSizeInBits); - return LLT::vector(NumElements, ScalarSizeInBits); + return ScalarTy; + return LLT::vector(NumElements, ScalarTy); } else if (auto PTy = dyn_cast<PointerType>(&Ty)) { return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty)); } else if (Ty.isSized()) { diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index c0a8b95ed8a0..4bd5fbfe38e6 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -2351,3 +2351,31 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, BB.insert(I, MI); return MachineInstrBuilder(MF, MI); } + +MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const MachineInstr &Orig, + int FrameIndex) { + const MDNode *Var = Orig.getDebugVariable(); + auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); + bool IsIndirect = Orig.isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0; + DebugLoc DL = Orig.getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + // If the DBG_VALUE already was a memory location, add an extra + // DW_OP_deref. Otherwise just turning this from a register into a + // memory/indirect location is sufficient. + if (IsIndirect) { + SmallVector<uint64_t, 8> Ops; + Ops.push_back(dwarf::DW_OP_deref); + if (Expr) + Ops.append(Expr->elements_begin(), Expr->elements_end()); + Expr = DIExpression::get(Expr->getContext(), Ops); + } + return BuildMI(BB, I, DL, Orig.getDesc()) + .addFrameIndex(FrameIndex) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); +} diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index d392c044bd71..84bd670105e1 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2030,6 +2030,8 @@ namespace { void MachineVerifier::verifyStackFrame() { unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) + return; SmallVector<StackStateOfBB, 8> SPState; SPState.resize(MF->getNumBlockIDs()); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index fd759bc372b2..283d84629f8e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -304,19 +304,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *Var = DBG->getDebugVariable(); - const MDNode *Expr = DBG->getDebugExpression(); - bool IsIndirect = DBG->isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; - DebugLoc DL = DBG->getDebugLoc(); - assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - MachineInstr *NewDV = - BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(FI) - .addImm(Offset) - .addMetadata(Var) - .addMetadata(Expr); + MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp index fa68411284e7..7fa379d80c6c 100644 --- a/contrib/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Replace alloc with the new location. replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - /*Deref=*/true, -Offset); + /*Deref=*/false, -Offset); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); @@ -565,7 +565,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (Size == 0) Size = 1; // Don't create zero-sized stack objects. - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -655,7 +655,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa<Instruction>(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4d468551ae24..4702d63cb617 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2146,7 +2146,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N->getFlags()->hasNoUnsignedWrap()) return N0; - if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. if (N->getFlags()->hasNoSignedWrap()) @@ -3705,7 +3705,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) @@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Load; // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); @@ -5058,8 +5057,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return Tmp; // Simplify the expression using non-local knowledge. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); @@ -5350,7 +5348,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { - Mask = Mask.lshr(c1 - c2); + Mask.lshrInPlace(c1 - c2); SDLoc DL(N); Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), DAG.getConstant(c1 - c2, DL, N1.getValueType())); @@ -5660,7 +5658,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { DAG.getConstant(ShiftAmt, DL0, getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); + APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), @@ -8300,11 +8298,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; - SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; - SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); break; // TODO: ISD::OR --> ISD::FNABS? default: @@ -8415,7 +8413,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { assert(VT.getSizeInBits() == 128); SDValue SignBit = DAG.getConstant( - APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); SDValue FlipBit; if (N0.getOpcode() == ISD::FNEG) { FlipBit = SignBit; @@ -8435,7 +8433,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewConv, DAG.getConstant(SignBit, DL, VT)); @@ -8483,7 +8481,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { - APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); @@ -8504,7 +8502,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); @@ -8687,7 +8685,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal = OpVal.lshr(DstBitSize); + OpVal.lshrInPlace(DstBitSize); } // For big endian targets, swap the order of the pieces of each element. @@ -10315,11 +10313,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. - SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... - SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL0(N0); Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, @@ -10420,11 +10418,11 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. - SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... - SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL(N0); Int = DAG.getNode(ISD::AND, DL, IntVT, Int, @@ -12375,6 +12373,27 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return LHS.OffsetFromBase < RHS.OffsetFromBase; }); + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + + unsigned StartIdx = 0; + while ((StartIdx + 1 < StoreNodes.size()) && + StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != + StoreNodes[StartIdx + 1].OffsetFromBase) + ++StartIdx; + + // Bail if we don't have enough candidates to merge. + if (StartIdx + 1 >= StoreNodes.size()) + return false; + + if (StartIdx) + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); + // Scan the memory operations on the chain and find the first non-consecutive // store memory address. unsigned NumConsecutiveStores = 0; @@ -12485,39 +12504,52 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 0; - bool IsVec = MemVT.isVector(); - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); - // This restriction could be loosened. - // Bail out if any stored values are not elements extracted from a vector. - // It should be possible to handle mixed sources, but load sources need - // more careful handling (see the block of code below that handles - // consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) - return false; + bool RV = false; + while (StoreNodes.size() >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 0; + bool IsVec = MemVT.isVector(); + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + unsigned StoreValOpcode = St->getValue().getOpcode(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a + // vector. It should be possible to handle mixed sources, but load + // sources need more careful handling (see the block of code below that + // handles consecutive loads). + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + return false; - // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); + // Find a legal type for the vector store. + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + NumStoresToMerge = i + 1; } - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - if (TLI.isTypeLegal(Ty) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) - NumStoresToMerge = i + 1; - } - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, - false, true); + bool Merged = MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true); + if (!Merged) + break; + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + RV = true; + NumConsecutiveStores -= NumStoresToMerge; + } + return RV; } // Below we handle the case of multiple consecutive stores that @@ -15122,9 +15154,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { // Extract the sub element from the constant bit mask. if (DAG.getDataLayout().isBigEndian()) { - Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); } else { - Bits = Bits.lshr(SubIdx * NumSubBits); + Bits.lshrInPlace(SubIdx * NumSubBits); } if (Split > 1) @@ -16004,7 +16036,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, +static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; @@ -16057,53 +16089,56 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return false; // Gather base node and offset information. - SDValue Base1, Base2; - int64_t Offset1, Offset2; - const GlobalValue *GV1, *GV2; - const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + SDValue Base0, Base1; + int64_t Offset0, Offset1; + const GlobalValue *GV0, *GV1; + const void *CV0, *CV1; + bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(), + Base0, Offset0, GV0, CV0); + bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(), Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), - Base2, Offset2, GV2, CV2); - // If they have a same base address then check to see if they overlap. - if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + // If they have the same base address, then check to see if they overlap. + unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. - if (isFrameIndex1 && isFrameIndex2) { + if (IsFrameIndex0 && IsFrameIndex1) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex()); Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); - Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); } // Otherwise, if we know what the bases are, and they aren't identical, then // we know they cannot alias. - if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1)) return false; // If we know required SrcValue1 and SrcValue2 have relatively large alignment // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by + // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && - (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && - (Op0->getMemoryVT().getSizeInBits() >> 3 == - Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { - int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); - int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); + int64_t SrcValOffset0 = Op0->getSrcValueOffset(); + int64_t SrcValOffset1 = Op1->getSrcValueOffset(); + unsigned OrigAlignment0 = Op0->getOriginalAlignment(); + unsigned OrigAlignment1 = Op1->getOriginalAlignment(); + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && + NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; // There is no overlap between these relatively aligned accesses of similar - // size, return no alias. - if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || - (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) + // size. Return no alias. + if ((OffAlign0 + NumBytes0) <= OffAlign1 || + (OffAlign1 + NumBytes1) <= OffAlign0) return false; } @@ -16115,19 +16150,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif + if (UseAA && Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(Op0->getSrcValueOffset(), - Op1->getSrcValueOffset()); - int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + - Op0->getSrcValueOffset() - MinOffset; - int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + - Op1->getSrcValueOffset() - MinOffset; + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; AliasResult AAResult = - AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == NoAlias) return false; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 0584ab9f60d1..6fb26fc3b73d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1164,9 +1164,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); if (Op->isReg()) { Op->setIsDebug(true); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, - DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + Op->getReg(), 0, DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fc7cd020fe2e..3bae3bf9ab7c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1343,7 +1343,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); - State.SignMask = APInt::getSignBit(NumBits); + State.SignMask = APInt::getSignMask(NumBits); State.SignBit = NumBits - 1; return; } @@ -2984,7 +2984,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NVT = Node->getValueType(0); APFloat apf(DAG.EVTToAPFloatSemantics(VT), APInt::getNullValue(VT.getSizeInBits())); - APInt x = APInt::getSignBit(NVT.getSizeInBits()); + APInt x = APInt::getSignMask(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6f2b1b94ce46..c1cb5d9b5235 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -72,7 +72,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: - R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; @@ -171,7 +171,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } } -SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, keep the extracted value in register. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 0a2b680e1c66..154af46c9446 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -925,9 +925,9 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getScalarValueSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); - unsigned NumElts = Op.getValueType().getVectorNumElements(); + auto EltCnt = Op.getValueType().getVectorElementCount(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), - EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); + EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 80c939700518..af55a22972a6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -428,7 +428,7 @@ private: SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 78fddb5ce8f5..1a7d7b7af5fa 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1293,12 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, if ((NumElements & 1) == 0 && SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); - EVT NewSrcVT = EVT::getVectorVT( - Ctx, EVT::getIntegerVT( - Ctx, SrcVT.getScalarSizeInBits() * 2), - NumElements); - EVT SplitSrcVT = - EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); + EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); + EVT SplitLoVT, SplitHiVT; std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && @@ -3012,8 +3009,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { // Don't touch if this will be scalarized. EVT FinalVT = VSelVT; while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) - FinalVT = EVT::getVectorVT(Ctx, FinalVT.getVectorElementType(), - FinalVT.getVectorNumElements() / 2); + FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx); + if (FinalVT.getVectorNumElements() == 1) return SDValue(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 003ea5030bfc..523f409e6b2c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -639,12 +639,15 @@ void SelectionDAG::DeallocateNode(SDNode *N) { // If we have operands, deallocate them. removeOperands(N); + NodeAllocator.Deallocate(AllNodes.remove(N)); + // Set the opcode to DELETED_NODE to help catch bugs when node // memory is reallocated. + // FIXME: There are places in SDag that have grown a dependency on the opcode + // value in the released node. + __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); N->NodeType = ISD::DELETED_NODE; - NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate // them and forget about that node. DbgInfo->erase(N); @@ -1826,7 +1829,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { @@ -1839,7 +1842,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -1955,7 +1958,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); + return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use @@ -2330,8 +2333,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); - KnownZero = KnownZero.lshr(*ShAmt); - KnownOne = KnownOne.lshr(*ShAmt); + KnownZero.lshrInPlace(*ShAmt); + KnownOne.lshrInPlace(*ShAmt); // High bits are known zero. KnownZero.setHighBits(ShAmt->getZExtValue()); } @@ -2340,15 +2343,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); - KnownZero = KnownZero.lshr(*ShAmt); - KnownOne = KnownOne.lshr(*ShAmt); + KnownZero.lshrInPlace(*ShAmt); + KnownOne.lshrInPlace(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. - APInt SignBit = APInt::getSignBit(BitWidth); - SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask. - if (KnownZero.intersects(SignBit)) { + APInt SignMask = APInt::getSignMask(BitWidth); + SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. + if (KnownZero.intersects(SignMask)) { KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (KnownOne.intersects(SignBit)) { + } else if (KnownOne.intersects(SignMask)) { KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } @@ -2361,14 +2364,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); - APInt InSignBit = APInt::getSignBit(EBits); + APInt InSignMask = APInt::getSignMask(EBits); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. - InSignBit = InSignBit.zext(BitWidth); + InSignMask = InSignMask.zext(BitWidth); if (NewBits.getBoolValue()) - InputDemandedBits |= InSignBit; + InputDemandedBits |= InSignMask; computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); @@ -2377,10 +2380,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + if (KnownZero.intersects(InSignMask)) { // Input sign bit known clear KnownZero |= NewBits; KnownOne &= ~NewBits; - } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + } else if (KnownOne.intersects(InSignMask)) { // Input sign bit known set KnownOne |= NewBits; KnownZero &= ~NewBits; } else { // Input sign bit unknown @@ -2745,7 +2748,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // a set bit that isn't the sign bit (otherwise it could be INT_MIN). KnownOne2.clearBit(BitWidth - 1); if (KnownOne2.getBoolValue()) { - KnownZero = APInt::getSignBit(BitWidth); + KnownZero = APInt::getSignMask(BitWidth); break; } break; @@ -2833,7 +2836,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, computeKnownBits(N0, N0Zero, N0One); bool overflow; - (~N0Zero).uadd_ov(~N1Zero, overflow); + (void)(~N0Zero).uadd_ov(~N1Zero, overflow); if (!overflow) return OFK_Never; } @@ -2874,7 +2877,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // one bit set. if (Val.getOpcode() == ISD::SRL) { auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); - if (C && C->getAPIntValue().isSignBit()) + if (C && C->getAPIntValue().isSignMask()) return true; } @@ -2967,7 +2970,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(Tmp, Tmp2); case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt ShiftVal = C->getAPIntValue(); @@ -3130,40 +3133,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } - case ISD::INSERT_VECTOR_ELT: { - SDValue InVec = Op.getOperand(0); - SDValue InVal = Op.getOperand(1); - SDValue EltNo = Op.getOperand(2); - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); - if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. - unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then get its sign bits. - Tmp = UINT_MAX; - if (DemandedElts[EltIdx]) - Tmp = ComputeNumSignBits(InVal, Depth + 1); - - // If we demand the source vector then get its sign bits, and determine - // the minimum. - APInt VectorElts = DemandedElts; - VectorElts.clearBit(EltIdx); - if (!!VectorElts) { - Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Tmp = ComputeNumSignBits(InVec, Depth + 1); - Tmp2 = ComputeNumSignBits(InVal, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); - return Tmp; - } case ISD::EXTRACT_VECTOR_ELT: { SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); @@ -7607,14 +7576,11 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { // Currently all types are split in half. EVT LoVT, HiVT; - if (!VT.isVector()) { + if (!VT.isVector()) LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); - } else { - unsigned NumElements = VT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), - NumElements/2); - } + else + LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); + return std::make_pair(LoVT, HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8708f58f1e63..2c58953ee908 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1151,7 +1151,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. @@ -4674,7 +4674,7 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4688,6 +4688,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; + bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) @@ -4701,15 +4702,19 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (PR) Reg = PR; } - if (Reg) + if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; + } } if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) + if (VMI != FuncInfo.ValueMap.end()) { Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; + } } if (!Op && N.getNode()) @@ -4955,8 +4960,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), @@ -4966,7 +4970,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. @@ -5613,7 +5617,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -6626,7 +6630,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; @@ -7389,7 +7393,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -7657,7 +7661,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c6acc09b6602..9e34590cc39c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -928,7 +928,7 @@ private: /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - int64_t Offset, bool IsIndirect, + int64_t Offset, bool IsDbgDeclare, const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2756e276c6a9..93c6738f650d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -574,7 +574,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // using the bits from the RHS. Below, we use knowledge about the RHS to // simplify the LHS, here we're using information from the LHS to simplify // the RHS. - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { SDValue Op0 = Op.getOperand(0); APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. @@ -715,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the RHS is a constant, see if we can simplify it. // for XOR, we prefer to force bits to 1 if they will make a -1. // If we can't force bits, try to shrink the constant. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt Expanded = C->getAPIntValue() | (~NewMask); // If we can expand it to have all bits set, do it. if (Expanded.isAllOnesValue()) { @@ -778,7 +778,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If (1) we only need the sign-bit, (2) the setcc operands are the same // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. - if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth && + if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -839,7 +839,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue InnerOp = InOp.getNode()->getOperand(0); EVT InnerVT = InnerOp.getValueType(); unsigned InnerBits = InnerVT.getSizeInBits(); - if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && + if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT, DL); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -861,12 +861,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InnerOp.getOpcode() == ISD::SRL && InnerOp.hasOneUse() && isa<ConstantSDNode>(InnerOp.getOperand(1))) { - uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) ->getZExtValue(); if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && - NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && - NewMask.trunc(ShAmt) == 0) { + NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && + NewMask.countTrailingZeros() >= ShAmt) { SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); @@ -929,8 +929,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + KnownZero.lshrInPlace(ShAmt); + KnownOne.lshrInPlace(ShAmt); KnownZero.setHighBits(ShAmt); // High bits known zero. } @@ -964,21 +964,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits()); + InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + KnownZero.lshrInPlace(ShAmt); + KnownOne.lshrInPlace(ShAmt); // Handle the sign bit, adjusted to where it is now in the mask. - APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) { SDNodeFlags Flags; Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()); return TLO.CombineTo(Op, @@ -996,7 +996,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), NewSA)); } - if (KnownOne.intersects(SignBit)) + if (KnownOne.intersects(SignMask)) // New bits are known one. KnownOne |= HighBits; } @@ -1040,7 +1040,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth); + APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, ExVT.getScalarSizeInBits()) & @@ -1205,20 +1205,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, getShiftAmountTy(Op.getValueType(), DL)); } - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); - - if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, - Shift)); + if (ShAmt->getZExtValue() < BitWidth) { + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits.lshrInPlace(ShAmt->getZExtValue()); + HighBits = HighBits.trunc(BitWidth); + + if (!(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + Shift)); + } } break; } @@ -1247,7 +1250,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!TLO.LegalOperations() && !Op.getValueType().isVector() && !Op.getOperand(0).getValueType().isVector() && - NewMask == APInt::getSignBit(Op.getValueSizeInBits()) && + NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); @@ -2055,7 +2058,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else { ShiftBits = C1.countTrailingZeros(); } - NewC = NewC.lshr(ShiftBits); + NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { auto &DL = DAG.getDataLayout(); @@ -3353,7 +3356,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); SDValue Bias = DAG.getConstant(127, dl, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, + SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl, IntVT); SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index cbce2dc89deb..bbb19b5e998d 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -579,7 +579,7 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } - uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; + uint32_t CallFile = 0, CallLine = 0, CallColumn = 0, CallDiscriminator = 0; for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { DWARFDie &FunctionDIE = InlinedChain[i]; DILineInfo Frame; @@ -605,10 +605,12 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, Spec.FLIKind, Frame.FileName); Frame.Line = CallLine; Frame.Column = CallColumn; + Frame.Discriminator = CallDiscriminator; } // Get call file/line/column of a current DIE. if (i + 1 < n) { - FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn); + FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn, + CallDiscriminator); } } InliningInfo.addFrame(Frame); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 4308cc2e2639..24039eb35209 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -290,10 +290,12 @@ uint64_t DWARFDie::getDeclLine() const { } void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, - uint32_t &CallColumn) const { + uint32_t &CallColumn, + uint32_t &CallDiscriminator) const { CallFile = toUnsigned(find(DW_AT_call_file), 0); CallLine = toUnsigned(find(DW_AT_call_line), 0); CallColumn = toUnsigned(find(DW_AT_call_column), 0); + CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, @@ -350,32 +352,6 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, } } -void DWARFDie::getInlinedChainForAddress( - const uint64_t Address, SmallVectorImpl<DWARFDie> &InlinedChain) const { - if (isNULL()) - return; - DWARFDie DIE(*this); - while (DIE) { - // Append current DIE to inlined chain only if it has correct tag - // (e.g. it is not a lexical block). - if (DIE.isSubroutineDIE()) - InlinedChain.push_back(DIE); - - // Try to get child which also contains provided address. - DWARFDie Child = DIE.getFirstChild(); - while (Child) { - if (Child.addressRangeContainsAddress(Address)) { - // Assume there is only one such child. - break; - } - Child = Child.getSibling(); - } - DIE = Child; - } - // Reverse the obtained chain to make the root of inlined chain last. - std::reverse(InlinedChain.begin(), InlinedChain.end()); -} - DWARFDie DWARFDie::getParent() const { if (isValid()) return U->getParent(Die); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 4ee8e8f46d2e..c3f467745402 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -343,37 +343,63 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { clearDIEs(true); } -DWARFDie -DWARFUnit::getSubprogramForAddress(uint64_t Address) { - extractDIEsIfNeeded(false); - for (const DWARFDebugInfoEntry &D : DieArray) { - DWARFDie DIE(this, &D); - if (DIE.isSubprogramDIE() && - DIE.addressRangeContainsAddress(Address)) { - return DIE; +void DWARFUnit::updateAddressDieMap(DWARFDie Die) { + if (Die.isSubroutineDIE()) { + for (const auto &R : Die.getAddressRanges()) { + // Ignore 0-sized ranges. + if (R.first == R.second) + continue; + auto B = AddrDieMap.upper_bound(R.first); + if (B != AddrDieMap.begin() && R.first < (--B)->second.first) { + // The range is a sub-range of existing ranges, we need to split the + // existing range. + if (R.second < B->second.first) + AddrDieMap[R.second] = B->second; + if (R.first > B->first) + AddrDieMap[B->first].first = R.first; + } + AddrDieMap[R.first] = std::make_pair(R.second, Die); } } - return DWARFDie(); + // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to + // simplify the logic to update AddrDieMap. The child's range will always + // be equal or smaller than the parent's range. With this assumption, when + // adding one range into the map, it will at most split a range into 3 + // sub-ranges. + for (DWARFDie Child = Die.getFirstChild(); Child; Child = Child.getSibling()) + updateAddressDieMap(Child); +} + +DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + if (AddrDieMap.empty()) + updateAddressDieMap(getUnitDIE()); + auto R = AddrDieMap.upper_bound(Address); + if (R == AddrDieMap.begin()) + return DWARFDie(); + // upper_bound's previous item contains Address. + --R; + if (Address >= R->second.first) + return DWARFDie(); + return R->second.second; } void DWARFUnit::getInlinedChainForAddress(uint64_t Address, SmallVectorImpl<DWARFDie> &InlinedChain) { - // First, find a subprogram that contains the given address (the root - // of inlined chain). - DWARFDie SubprogramDIE; + assert(InlinedChain.empty()); // Try to look for subprogram DIEs in the DWO file. parseDWO(); - if (DWO) - SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address); - else - SubprogramDIE = getSubprogramForAddress(Address); - - // Get inlined chain rooted at this subprogram DIE. - if (SubprogramDIE) - SubprogramDIE.getInlinedChainForAddress(Address, InlinedChain); - else - InlinedChain.clear(); + // First, find the subroutine that contains the given address (the leaf + // of inlined chain). + DWARFDie SubroutineDIE = + (DWO ? DWO->getUnit() : this)->getSubroutineForAddress(Address); + + while (SubroutineDIE) { + if (SubroutineDIE.isSubroutineDIE()) + InlinedChain.push_back(SubroutineDIE); + SubroutineDIE = SubroutineDIE.getParent(); + } } const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context, diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index e29e9fc2c702..10b4e98b6079 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1580,7 +1580,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, GenericValue Elt; Elt.IntVal = Elt.IntVal.zext(SrcBitSize); Elt.IntVal = TempSrc.AggregateVal[i].IntVal; - Elt.IntVal = Elt.IntVal.lshr(ShiftAmt); + Elt.IntVal.lshrInPlace(ShiftAmt); // it could be DstBitSize == SrcBitSize, so check it if (DstBitSize < SrcBitSize) Elt.IntVal = Elt.IntVal.trunc(DstBitSize); diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index 2b7359dab807..d690111ef210 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -984,20 +984,23 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - AttributeSet AS) const { - if (!AS.hasAttributes()) + const AttrBuilder &B) const { + if (!B.hasAttributes()) return *this; + if (!pImpl) + return AttributeList::get(C, {{Index, AttributeSet::get(C, B)}}); + #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. unsigned OldAlign = getParamAlignment(Index); - unsigned NewAlign = AS.getAlignment(); + unsigned NewAlign = B.getAlignment(); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif - SmallVector<std::pair<unsigned, AttributeSet>, 4> AttrSet; + SmallVector<IndexAttrPair, 4> AttrVec; uint64_t NumAttrs = pImpl->getNumSlots(); unsigned I; @@ -1005,31 +1008,25 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, for (I = 0; I < NumAttrs; ++I) { if (getSlotIndex(I) >= Index) break; - AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); } + AttrBuilder NewAttrs; if (I < NumAttrs && getSlotIndex(I) == Index) { - // We need to merge two AttributeSets. - AttributeSet Merged = AttributeSet::get( - C, AttrBuilder(pImpl->getSlotNode(I)).merge(AttrBuilder(AS))); - AttrSet.emplace_back(Index, Merged); + // We need to merge the attribute sets. + NewAttrs.merge(pImpl->getSlotNode(I)); ++I; - } else { - // Otherwise, there were no attributes at this position in the original - // list. Add the set as is. - AttrSet.emplace_back(Index, AS); } + NewAttrs.merge(B); + + // Add the new or merged attribute set at this index. + AttrVec.emplace_back(Index, AttributeSet::get(C, NewAttrs)); // Add the remaining entries. for (; I < NumAttrs; ++I) - AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); - - return get(C, AttrSet); -} + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); -AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &B) const { - return get(C, Index, AttributeSet::get(C, B)); + return get(C, AttrVec); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, @@ -1046,46 +1043,7 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, AttributeList Attrs) const { - if (!pImpl) - return AttributeList(); - if (!Attrs.pImpl) return *this; - - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Index, Attribute::Alignment) && - "Attempt to change alignment!"); - - // Add the attribute slots before the one we're trying to add. - SmallVector<AttributeList, 4> AttrSet; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); - break; - } - LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); - } - - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); - - for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Index) { - B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index); - break; - } - - AttrSet.push_back(AttributeList::get(C, Index, B)); - - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); - - return get(C, AttrSet); + return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index))); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, @@ -1098,31 +1056,30 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); // Add the attribute slots before the one we're trying to add. - SmallVector<AttributeList, 4> AttrSet; + SmallVector<IndexAttrPair, 4> AttrSets; uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; + AttrBuilder B; uint64_t LastIndex = 0; for (unsigned I = 0, E = NumAttrs; I != E; ++I) { if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); + if (getSlotIndex(I) == Index) + B = AttrBuilder(pImpl->getSlotNode(LastIndex++)); break; } LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); + AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); } - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); + // Remove the attributes from the existing set and add them. B.remove(Attrs); - - AttrSet.push_back(AttributeList::get(C, Index, B)); + if (B.hasAttributes()) + AttrSets.push_back({Index, AttributeSet::get(C, B)}); // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); + AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); - return get(C, AttrSet); + return get(C, AttrSets); } AttributeList AttributeList::removeAttributes(LLVMContext &C, @@ -1406,18 +1363,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) { - unsigned Slot = ~0U; - for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) - if (A.getSlotIndex(I) == Index) { - Slot = I; - break; - } - - assert(Slot != ~0U && "Couldn't find index in AttributeList!"); - - for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E; - ++I) { - Attribute Attr = *I; + for (Attribute Attr : A.getAttributes(Index)) { if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { removeAttribute(Attr.getKindAsEnum()); } else { diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp index bba230677ebf..80b117015ede 100644 --- a/contrib/llvm/lib/IR/ConstantFold.cpp +++ b/contrib/llvm/lib/IR/ConstantFold.cpp @@ -223,7 +223,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { APInt V = CI->getValue(); if (ByteStart) - V = V.lshr(ByteStart*8); + V.lshrInPlace(ByteStart*8); V = V.trunc(ByteSize*8); return ConstantInt::get(CI->getContext(), V); } diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp index 8dfd6c8036c4..0cc38b025209 100644 --- a/contrib/llvm/lib/IR/ConstantRange.cpp +++ b/contrib/llvm/lib/IR/ConstantRange.cpp @@ -29,8 +29,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -/// Initialize a full (the default) or empty set for the specified type. -/// ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { if (Full) Lower = Upper = APInt::getMaxValue(BitWidth); @@ -38,8 +36,6 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { Lower = Upper = APInt::getMinValue(BitWidth); } -/// Initialize a range to hold the single specified value. -/// ConstantRange::ConstantRange(APInt V) : Lower(std::move(V)), Upper(Lower + 1) {} @@ -232,35 +228,23 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return Result; } -/// isFullSet - Return true if this set contains all of the elements possible -/// for this data-type bool ConstantRange::isFullSet() const { return Lower == Upper && Lower.isMaxValue(); } -/// isEmptySet - Return true if this set contains no members. -/// bool ConstantRange::isEmptySet() const { return Lower == Upper && Lower.isMinValue(); } -/// isWrappedSet - Return true if this set wraps around the top of the range, -/// for example: [100, 8) -/// bool ConstantRange::isWrappedSet() const { return Lower.ugt(Upper); } -/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of -/// its bitwidth, for example: i8 [120, 140). -/// bool ConstantRange::isSignWrappedSet() const { return contains(APInt::getSignedMaxValue(getBitWidth())) && contains(APInt::getSignedMinValue(getBitWidth())); } -/// getSetSize - Return the number of elements in this set. -/// APInt ConstantRange::getSetSize() const { if (isFullSet()) { APInt Size(getBitWidth()+1, 0); @@ -272,12 +256,6 @@ APInt ConstantRange::getSetSize() const { return (Upper - Lower).zext(getBitWidth()+1); } -/// isSizeStrictlySmallerThanOf - Compare set size of this range with the range -/// CR. -/// This function is faster than comparing results of getSetSize for the two -/// ranges, because we don't need to extend bitwidth of APInts we're operating -/// with. -/// bool ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const { assert(getBitWidth() == Other.getBitWidth()); @@ -288,58 +266,44 @@ ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const { return (Upper - Lower).ult(Other.Upper - Other.Lower); } -/// getUnsignedMax - Return the largest unsigned value contained in the -/// ConstantRange. -/// APInt ConstantRange::getUnsignedMax() const { if (isFullSet() || isWrappedSet()) return APInt::getMaxValue(getBitWidth()); return getUpper() - 1; } -/// getUnsignedMin - Return the smallest unsigned value contained in the -/// ConstantRange. -/// APInt ConstantRange::getUnsignedMin() const { if (isFullSet() || (isWrappedSet() && getUpper() != 0)) return APInt::getMinValue(getBitWidth()); return getLower(); } -/// getSignedMax - Return the largest signed value contained in the -/// ConstantRange. -/// APInt ConstantRange::getSignedMax() const { APInt SignedMax(APInt::getSignedMaxValue(getBitWidth())); if (!isWrappedSet()) { - if (getLower().sle(getUpper() - 1)) - return getUpper() - 1; - return SignedMax; + APInt UpperMinusOne = getUpper() - 1; + if (getLower().sle(UpperMinusOne)) + return UpperMinusOne; + return APInt::getSignedMaxValue(getBitWidth()); } if (getLower().isNegative() == getUpper().isNegative()) - return SignedMax; + return APInt::getSignedMaxValue(getBitWidth()); return getUpper() - 1; } -/// getSignedMin - Return the smallest signed value contained in the -/// ConstantRange. -/// APInt ConstantRange::getSignedMin() const { - APInt SignedMin(APInt::getSignedMinValue(getBitWidth())); if (!isWrappedSet()) { if (getLower().sle(getUpper() - 1)) return getLower(); - return SignedMin; + return APInt::getSignedMinValue(getBitWidth()); } if ((getUpper() - 1).slt(getLower())) { - if (getUpper() != SignedMin) - return SignedMin; + if (!getUpper().isMinSignedValue()) + return APInt::getSignedMinValue(getBitWidth()); } return getLower(); } -/// contains - Return true if the specified value is in the set. -/// bool ConstantRange::contains(const APInt &V) const { if (Lower == Upper) return isFullSet(); @@ -349,10 +313,6 @@ bool ConstantRange::contains(const APInt &V) const { return Lower.ule(V) || V.ult(Upper); } -/// contains - Return true if the argument is a subset of this range. -/// Two equal sets contain each other. The empty set contained by all other -/// sets. -/// bool ConstantRange::contains(const ConstantRange &Other) const { if (isFullSet() || Other.isEmptySet()) return true; if (isEmptySet() || Other.isFullSet()) return false; @@ -371,8 +331,6 @@ bool ConstantRange::contains(const ConstantRange &Other) const { return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower()); } -/// subtract - Subtract the specified constant from the endpoints of this -/// constant range. ConstantRange ConstantRange::subtract(const APInt &Val) const { assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width"); // If the set is empty or full, don't modify the endpoints. @@ -381,17 +339,10 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const { return ConstantRange(Lower - Val, Upper - Val); } -/// \brief Subtract the specified range from this range (aka relative complement -/// of the sets). ConstantRange ConstantRange::difference(const ConstantRange &CR) const { return intersectWith(CR.inverse()); } -/// intersectWith - Return the range that results from the intersection of this -/// range with another range. The resultant range is guaranteed to include all -/// elements contained in both input ranges, and to have the smallest possible -/// set size that does so. Because there may be two intersections with the -/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A). ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); @@ -466,13 +417,6 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { return CR; } - -/// unionWith - Return the range that results from the union of this range with -/// another range. The resultant range is guaranteed to include the elements of -/// both sets, but may contain more. For example, [3, 9) union [12,15) is -/// [3, 15), which includes 9, 10, and 11, which were not included in either -/// set before. -/// ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); @@ -593,10 +537,6 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, }; } -/// zeroExtend - Return a new range in the specified integer type, which must -/// be strictly larger than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// zero extended. ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false); @@ -613,10 +553,6 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize)); } -/// signExtend - Return a new range in the specified integer type, which must -/// be strictly larger than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// sign extended. ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false); @@ -635,10 +571,6 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize)); } -/// truncate - Return a new range in the specified integer type, which must be -/// strictly smaller than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// truncated to the specified type. ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { assert(getBitWidth() > DstTySize && "Not a value truncation"); if (isEmptySet()) @@ -690,8 +622,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { return ConstantRange(DstTySize, /*isFullSet=*/true); } -/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The -/// value is zero extended, truncated, or left alone to make it that width. ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); if (SrcTySize > DstTySize) @@ -701,8 +631,6 @@ ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const { return *this; } -/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The -/// value is sign extended, truncated, or left alone to make it that width. ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); if (SrcTySize > DstTySize) @@ -999,8 +927,6 @@ ConstantRange ConstantRange::inverse() const { return ConstantRange(Upper, Lower); } -/// print - Print out the bounds to a stream... -/// void ConstantRange::print(raw_ostream &OS) const { if (isFullSet()) OS << "full-set"; @@ -1011,8 +937,6 @@ void ConstantRange::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// dump - Allow printing from a debugger easily... -/// LLVM_DUMP_METHOD void ConstantRange::dump() const { print(dbgs()); } diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp index c5f93c9f4db0..ffc8f2e4303b 100644 --- a/contrib/llvm/lib/IR/Constants.cpp +++ b/contrib/llvm/lib/IR/Constants.cpp @@ -518,27 +518,19 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { } Constant *ConstantInt::getTrue(Type *Ty) { - VectorType *VTy = dyn_cast<VectorType>(Ty); - if (!VTy) { - assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1."); - return ConstantInt::getTrue(Ty->getContext()); - } - assert(VTy->getElementType()->isIntegerTy(1) && - "True must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getTrue(Ty->getContext())); + assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext()); + if (auto *VTy = dyn_cast<VectorType>(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), TrueC); + return TrueC; } Constant *ConstantInt::getFalse(Type *Ty) { - VectorType *VTy = dyn_cast<VectorType>(Ty); - if (!VTy) { - assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1."); - return ConstantInt::getFalse(Ty->getContext()); - } - assert(VTy->getElementType()->isIntegerTy(1) && - "False must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getFalse(Ty->getContext())); + assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext()); + if (auto *VTy = dyn_cast<VectorType>(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), FalseC); + return FalseC; } // Get a ConstantInt from an APInt. diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index b5ed30b85c8a..50292b6e20bf 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -863,6 +863,19 @@ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count); } +LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD) { + return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD))); +} + +LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val) { + auto *V = unwrap(Val); + if (auto *C = dyn_cast<Constant>(V)) + return wrap(ConstantAsMetadata::get(C)); + if (auto *MAV = dyn_cast<MetadataAsValue>(V)) + return wrap(MAV->getMetadata()); + return wrap(ValueAsMetadata::get(V)); +} + const char *LLVMGetMDString(LLVMValueRef V, unsigned *Length) { if (const auto *MD = dyn_cast<MetadataAsValue>(unwrap(V))) if (const MDString *S = dyn_cast<MDString>(MD->getMetadata())) { @@ -1883,13 +1896,8 @@ void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, const char *V) { Function *Func = unwrap<Function>(Fn); - AttributeList::AttrIndex Idx = - AttributeList::AttrIndex(AttributeList::FunctionIndex); - AttrBuilder B; - - B.addAttribute(A, V); - AttributeList Set = AttributeList::get(Func->getContext(), Idx, B); - Func->addAttributes(Idx, Set); + Attribute Attr = Attribute::get(Func->getContext(), A, V); + Func->addAttribute(AttributeList::FunctionIndex, Attr); } /*--.. Operations on parameters ............................................--*/ @@ -1949,9 +1957,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap<Argument>(Arg); - AttrBuilder B; - B.addAlignmentAttr(align); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::getWithAlignment(A->getContext(), align)); } /*--.. Operations on basic blocks ..........................................--*/ @@ -2158,11 +2164,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { CallSite Call = CallSite(unwrap<Instruction>(Instr)); - AttrBuilder B; - B.addAlignmentAttr(align); - Call.setAttributes(Call.getAttributes().addAttributes( - Call->getContext(), index, - AttributeList::get(Call->getContext(), index, B))); + Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align); + Call.addAttribute(index, AlignAttr); } void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp index 6f90ce598568..93bacdd2e80f 100644 --- a/contrib/llvm/lib/IR/DataLayout.cpp +++ b/contrib/llvm/lib/IR/DataLayout.cpp @@ -608,11 +608,8 @@ unsigned DataLayout::getPointerSize(unsigned AS) const { unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "This should only be called with a pointer or pointer vector type"); - - if (Ty->isPointerTy()) - return getTypeSizeInBits(Ty); - - return getTypeSizeInBits(Ty->getScalarType()); + Ty = Ty->getScalarType(); + return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()); } /*! @@ -624,7 +621,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { == false) for the requested type \a Ty. */ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { - int AlignType = -1; + AlignTypeEnum AlignType; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { @@ -673,8 +670,7 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { llvm_unreachable("Bad type for getAlignment!!!"); } - return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty), - abi_or_pref, Ty); + return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty); } unsigned DataLayout::getABITypeAlignment(Type *Ty) const { diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index c4bb9e83acd7..e1f5fdea44e4 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -138,13 +138,18 @@ bool Argument::onlyReadsMemory() const { Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone); } -void Argument::addAttr(AttributeList AS) { - assert(AS.getNumSlots() <= 1 && - "Trying to add more than one attribute set to an argument!"); - AttrBuilder B(AS, AS.getSlotIndex(0)); - getParent()->addAttributes( - getArgNo() + 1, - AttributeList::get(Parent->getContext(), getArgNo() + 1, B)); +void Argument::addAttrs(AttrBuilder &B) { + AttributeList AL = getParent()->getAttributes(); + AL = AL.addAttributes(Parent->getContext(), getArgNo() + 1, B); + getParent()->setAttributes(AL); +} + +void Argument::addAttr(Attribute::AttrKind Kind) { + getParent()->addAttribute(getArgNo() + 1, Kind); +} + +void Argument::addAttr(Attribute Attr) { + getParent()->addAttribute(getArgNo() + 1, Attr); } void Argument::removeAttr(AttributeList AS) { @@ -156,6 +161,10 @@ void Argument::removeAttr(AttributeList AS) { AttributeList::get(Parent->getContext(), getArgNo() + 1, B)); } +void Argument::removeAttr(Attribute::AttrKind Kind) { + getParent()->removeAttribute(getArgNo() + 1, Kind); +} + bool Argument::hasAttribute(Attribute::AttrKind Kind) const { return getParent()->hasParamAttribute(getArgNo(), Kind); } diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index c10c144122e2..76582e334d1f 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -1855,7 +1855,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; // Mask must be vector of i32. - VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); + auto *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32)) return false; @@ -1863,10 +1863,10 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask)) return true; - if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) { + if (const auto *MV = dyn_cast<ConstantVector>(Mask)) { unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements(); for (Value *Op : MV->operands()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (auto *CI = dyn_cast<ConstantInt>(Op)) { if (CI->uge(V1Size*2)) return false; } else if (!isa<UndefValue>(Op)) { @@ -1876,8 +1876,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return true; } - if (const ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(Mask)) { + if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) { unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements(); for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i) if (CDS->getElementAsInteger(i) >= V1Size*2) @@ -1889,7 +1888,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, // used as the shuffle mask. When this occurs, the shuffle mask will // fall into this case and fail. To avoid this error, do this bit of // ugliness to allow such a mask pass. - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask)) + if (const auto *CE = dyn_cast<ConstantExpr>(Mask)) if (CE->getOpcode() == Instruction::UserOp1) return true; @@ -1898,7 +1897,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) { assert(i < Mask->getType()->getVectorNumElements() && "Index out of range"); - if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask)) + if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) return CDS->getElementAsInteger(i); Constant *C = Mask->getAggregateElement(i); if (isa<UndefValue>(C)) @@ -1910,7 +1909,7 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result) { unsigned NumElts = Mask->getType()->getVectorNumElements(); - if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) { + if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) { for (unsigned i = 0; i != NumElts; ++i) Result.push_back(CDS->getElementAsInteger(i)); return; diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index cc32e90ad36e..1a320b0165fa 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -168,7 +168,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section, // and the current Label. const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, - asmInfo->getPointerSize()); + asmInfo->getCodePointerSize()); Discriminator = 0; LastLine = LineEntry.getLine(); @@ -188,7 +188,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section, const MCAsmInfo *AsmInfo = Ctx.getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, - AsmInfo->getPointerSize()); + AsmInfo->getCodePointerSize()); } // @@ -594,7 +594,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // Figure the padding after the header before the table of address and size // pairs who's values are PointerSize'ed. const MCAsmInfo *asmInfo = context.getAsmInfo(); - int AddrSize = asmInfo->getPointerSize(); + int AddrSize = asmInfo->getCodePointerSize(); int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1)); if (Pad == 2 * AddrSize) Pad = 0; @@ -677,7 +677,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // The DWARF v5 header has unit type, address size, abbrev offset. // Earlier versions have abbrev offset, address size. const MCAsmInfo &AsmInfo = *context.getAsmInfo(); - int AddrSize = AsmInfo.getPointerSize(); + int AddrSize = AsmInfo.getCodePointerSize(); if (context.getDwarfVersion() >= 5) { MCOS->EmitIntValue(dwarf::DW_UT_compile, 1); MCOS->EmitIntValue(AddrSize, 1); @@ -823,7 +823,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) { auto &Sections = context.getGenDwarfSectionSyms(); const MCAsmInfo *AsmInfo = context.getAsmInfo(); - int AddrSize = AsmInfo->getPointerSize(); + int AddrSize = AsmInfo->getCodePointerSize(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); @@ -981,7 +981,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, default: llvm_unreachable("Unknown Encoding"); case dwarf::DW_EH_PE_absptr: case dwarf::DW_EH_PE_signed: - return context.getAsmInfo()->getPointerSize(); + return context.getAsmInfo()->getCodePointerSize(); case dwarf::DW_EH_PE_udata2: case dwarf::DW_EH_PE_sdata2: return 2; @@ -1318,7 +1318,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality, if (CIEVersion >= 4) { // Address Size - Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1); + Streamer.EmitIntValue(context.getAsmInfo()->getCodePointerSize(), 1); // Segment Descriptor Size Streamer.EmitIntValue(0, 1); @@ -1384,7 +1384,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality, InitialCFAOffset = CFAOffset; // Padding - Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); + Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getCodePointerSize()); Streamer.EmitLabel(sectionEnd); return *sectionStart; @@ -1453,7 +1453,7 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart, // The size of a .eh_frame section has to be a multiple of the alignment // since a null CIE is interpreted as the end. Old systems overaligned // .eh_frame, so we do too and account for it in the last FDE. - unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize; + unsigned Align = LastInSection ? asmInfo->getCodePointerSize() : PCSize; Streamer.EmitValueToAlignment(Align); Streamer.EmitLabel(fdeEnd); @@ -1514,6 +1514,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); + const MCAsmInfo *AsmInfo = Context.getAsmInfo(); FrameEmitterImpl Emitter(IsEH, Streamer); ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getDwarfFrameInfos(); @@ -1525,7 +1526,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, if (Frame.CompactUnwindEncoding == 0) continue; if (!SectionEmitted) { Streamer.SwitchSection(MOFI->getCompactUnwindSection()); - Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize()); + Streamer.EmitValueToAlignment(AsmInfo->getCodePointerSize()); SectionEmitted = true; } NeedsEHFrameSection |= diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index e65ce9f0b936..42e8ad340281 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -1755,8 +1755,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_8BYTE: return parseDirectiveValue(IDVal, 8); case DK_DC_A: - return parseDirectiveValue(IDVal, - getContext().getAsmInfo()->getPointerSize()); + return parseDirectiveValue( + IDVal, getContext().getAsmInfo()->getCodePointerSize()); case DK_OCTA: return parseDirectiveOctaValue(IDVal); case DK_SINGLE: diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp index f2021f796d12..c4924f85a907 100644 --- a/contrib/llvm/lib/Object/Archive.cpp +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -1,4 +1,4 @@ -//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// +//===- Archive.cpp - ar File Format implementation ------------------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/Archive.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> using namespace llvm; using namespace object; @@ -25,7 +42,7 @@ using namespace llvm::support::endian; static const char *const Magic = "!<arch>\n"; static const char *const ThinMagic = "!<thin>\n"; -void Archive::anchor() { } +void Archive::anchor() {} static Error malformedError(Twine Msg) { @@ -61,8 +78,8 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, if (Err) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator, - sizeof(ArMemHdr->Terminator))); + OS.write_escaped(StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); OS.flush(); std::string Msg("terminator characters in archive member \"" + Buf + "\" not the correct \"`\\n\" values for the archive " @@ -97,13 +114,13 @@ Expected<StringRef> ArchiveMemberHeader::getRawName() const { EndCond = ' '; else EndCond = '/'; - llvm::StringRef::size_type end = - llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); - if (end == llvm::StringRef::npos) + StringRef::size_type end = + StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == StringRef::npos) end = sizeof(ArMemHdr->Name); assert(end <= sizeof(ArMemHdr->Name) && end > 0); // Don't include the EndCond if there is one. - return llvm::StringRef(ArMemHdr->Name, end); + return StringRef(ArMemHdr->Name, end); } // This gets the name looking up long names. Size is the size of the archive @@ -205,12 +222,12 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { Expected<uint32_t> ArchiveMemberHeader::getSize() const { uint32_t Ret; - if (llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + if (StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); @@ -227,8 +244,8 @@ Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const { sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); @@ -247,8 +264,8 @@ ArchiveMemberHeader::getLastModified() const { .getAsInteger(10, Seconds)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp index 8467d349cd95..2b44c4a82d2c 100644 --- a/contrib/llvm/lib/Object/Binary.cpp +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -1,4 +1,4 @@ -//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +//===- Binary.cpp - A generic binary file ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -11,21 +11,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" - -// Include headers for createBinary. #include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> +#include <system_error> using namespace llvm; using namespace object; -Binary::~Binary() {} +Binary::~Binary() = default; Binary::Binary(unsigned int Type, MemoryBufferRef Source) : TypeID(Type), Data(Source) {} diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index a2d8f12449e6..1866aba9b21a 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===// +//===- COFFObjectFile.cpp - COFF object file implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <cctype> +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> #include <limits> +#include <memory> +#include <system_error> using namespace llvm; using namespace object; @@ -116,7 +128,7 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p); -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the section points to a valid entry in the section table. if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) report_fatal_error("Section was outside of section table."); @@ -124,7 +136,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); assert(Offset % sizeof(coff_section) == 0 && "Section did not point to the beginning of a section"); -# endif +#endif return Addr; } @@ -985,7 +997,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { if (Symbol.getNumberOfAuxSymbols() > 0) { // AUX data comes immediately after the symbol in COFF Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize; -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the Aux symbol points to a valid entry in the symbol table. uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); if (Offset < getPointerToSymbolTable() || @@ -995,7 +1007,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && "Aux Symbol data did not point to the beginning of a symbol"); -# endif +#endif } return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); } diff --git a/contrib/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm/lib/Object/IRSymtab.cpp index da1ef9946b50..bb3d1b2cf695 100644 --- a/contrib/llvm/lib/Object/IRSymtab.cpp +++ b/contrib/llvm/lib/Object/IRSymtab.cpp @@ -28,14 +28,12 @@ struct Builder { Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab) : Symtab(Symtab), Strtab(Strtab) {} - StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; BumpPtrAllocator Alloc; StringSaver Saver{Alloc}; DenseMap<const Comdat *, unsigned> ComdatMap; - ModuleSymbolTable Msymtab; - SmallPtrSet<GlobalValue *, 8> Used; Mangler Mang; Triple TT; @@ -49,6 +47,7 @@ struct Builder { void setStr(storage::Str &S, StringRef Value) { S.Offset = StrtabBuilder.add(Value); + S.Size = Value.size(); } template <typename T> void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { @@ -59,18 +58,24 @@ struct Builder { } Error addModule(Module *M); - Error addSymbol(ModuleSymbolTable::Symbol Sym); + Error addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Sym); Error build(ArrayRef<Module *> Mods); }; Error Builder::addModule(Module *M) { + SmallPtrSet<GlobalValue *, 8> Used; collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); - storage::Module Mod; - Mod.Begin = Msymtab.symbols().size(); + ModuleSymbolTable Msymtab; Msymtab.addModule(M); - Mod.End = Msymtab.symbols().size(); + + storage::Module Mod; + Mod.Begin = Syms.size(); + Mod.End = Syms.size() + Msymtab.symbols().size(); + Mod.UncBegin = Uncommons.size(); Mods.push_back(Mod); if (TT.isOSBinFormatCOFF()) { @@ -84,20 +89,25 @@ Error Builder::addModule(Module *M) { } } + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msymtab, Used, Msym)) + return Err; + return Error::success(); } -Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { +Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Msym) { Syms.emplace_back(); storage::Symbol &Sym = Syms.back(); Sym = {}; - Sym.UncommonIndex = -1; storage::Uncommon *Unc = nullptr; auto Uncommon = [&]() -> storage::Uncommon & { if (Unc) return *Unc; - Sym.UncommonIndex = Uncommons.size(); + Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; Uncommons.emplace_back(); Unc = &Uncommons.back(); *Unc = {}; @@ -194,15 +204,10 @@ Error Builder::build(ArrayRef<Module *> IRMods) { setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); TT = Triple(IRMods[0]->getTargetTriple()); - // This adds the symbols for each module to Msymtab. for (auto *M : IRMods) if (Error Err = addModule(M)) return Err; - for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) - if (Error Err = addSymbol(Msym)) - return Err; - COFFLinkerOptsOS.flush(); setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index f36388b677f3..1f60e7157bd9 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===// +//===- ObjectFile.cpp - File format independent object file ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,20 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Object/Wasm.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> #include <system_error> using namespace llvm; using namespace object; -void ObjectFile::anchor() { } +void ObjectFile::anchor() {} ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) : SymbolicFile(Type, Source) {} diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp index 4b51a49cf342..16cff5c228bd 100644 --- a/contrib/llvm/lib/Object/SymbolicFile.cpp +++ b/contrib/llvm/lib/Object/SymbolicFile.cpp @@ -1,4 +1,4 @@ -//===- SymbolicFile.cpp - Interface that only provides symbols --*- C++ -*-===// +//===- SymbolicFile.cpp - Interface that only provides symbols ------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> using namespace llvm; using namespace object; @@ -24,7 +32,7 @@ using namespace object; SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) : Binary(Type, Source) {} -SymbolicFile::~SymbolicFile() {} +SymbolicFile::~SymbolicFile() = default; Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index 9778628911cd..c4c892f0352a 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -3442,7 +3442,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, // Ignore trailing binary zeros. int trailingZeros = significand.countTrailingZeros(); exp += trailingZeros; - significand = significand.lshr(trailingZeros); + significand.lshrInPlace(trailingZeros); // Change the exponent from 2^e to 10^e. if (exp == 0) { diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index 0c7da1dad0d2..2d049a1cff85 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -125,16 +125,16 @@ APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) fromString(numbits, Str, radix); } -APInt& APInt::AssignSlowCase(const APInt& RHS) { +void APInt::AssignSlowCase(const APInt& RHS) { // Don't do anything for X = X if (this == &RHS) - return *this; + return; if (BitWidth == RHS.getBitWidth()) { // assume same bit-width single-word case is already handled assert(!isSingleWord()); memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); - return *this; + return; } if (isSingleWord()) { @@ -154,7 +154,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) { memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); } BitWidth = RHS.BitWidth; - return clearUnusedBits(); + clearUnusedBits(); } /// This method 'profiles' an APInt for use with FoldingSet. @@ -339,19 +339,16 @@ APInt& APInt::operator*=(const APInt& RHS) { return *this; } -APInt& APInt::AndAssignSlowCase(const APInt& RHS) { +void APInt::AndAssignSlowCase(const APInt& RHS) { tcAnd(pVal, RHS.pVal, getNumWords()); - return *this; } -APInt& APInt::OrAssignSlowCase(const APInt& RHS) { +void APInt::OrAssignSlowCase(const APInt& RHS) { tcOr(pVal, RHS.pVal, getNumWords()); - return *this; } -APInt& APInt::XorAssignSlowCase(const APInt& RHS) { +void APInt::XorAssignSlowCase(const APInt& RHS) { tcXor(pVal, RHS.pVal, getNumWords()); - return *this; } APInt APInt::operator*(const APInt& RHS) const { @@ -367,14 +364,6 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { return std::equal(pVal, pVal + getNumWords(), RHS.pVal); } -bool APInt::EqualSlowCase(uint64_t Val) const { - unsigned n = getActiveBits(); - if (n <= APINT_BITS_PER_WORD) - return pVal[0] == Val; - else - return false; -} - bool APInt::ult(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) @@ -733,6 +722,22 @@ unsigned APInt::countPopulationSlowCase() const { return Count; } +bool APInt::intersectsSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((pVal[i] & RHS.pVal[i]) != 0) + return true; + + return false; +} + +bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((pVal[i] & ~RHS.pVal[i]) != 0) + return false; + + return true; +} + APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) @@ -774,14 +779,12 @@ APInt APInt::reverseBits() const { } APInt Val(*this); - APInt Reversed(*this); - int S = BitWidth - 1; - - const APInt One(BitWidth, 1); + APInt Reversed(BitWidth, 0); + unsigned S = BitWidth; - for ((Val = Val.lshr(1)); Val != 0; (Val = Val.lshr(1))) { + for (; Val != 0; Val.lshrInPlace(1)) { Reversed <<= 1; - Reversed |= (Val & One); + Reversed |= Val[0]; --S; } @@ -1136,63 +1139,14 @@ APInt APInt::ashr(unsigned shiftAmt) const { /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. -APInt APInt::lshr(const APInt &shiftAmt) const { - return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth)); -} - -/// Perform a logical right-shift from Src to Dst of Words words, by Shift, -/// which must be less than 64. If the source and destination ranges overlap, -/// we require that Src >= Dst (put another way, we require that the overall -/// operation is a right shift on the combined range). -static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src, - unsigned Words, unsigned Shift) { - assert(Shift < APInt::APINT_BITS_PER_WORD); - - if (!Words) - return; - - if (Shift == 0) { - std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE); - return; - } - - uint64_t Low = Src[0]; - for (unsigned I = 1; I != Words; ++I) { - uint64_t High = Src[I]; - Dst[I - 1] = - (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift)); - Low = High; - } - Dst[Words - 1] = Low >> Shift; +void APInt::lshrInPlace(const APInt &shiftAmt) { + lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); } /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. -void APInt::lshrInPlace(unsigned shiftAmt) { - if (isSingleWord()) { - if (shiftAmt >= BitWidth) - VAL = 0; - else - VAL >>= shiftAmt; - return; - } - - // Don't bother performing a no-op shift. - if (!shiftAmt) - return; - - // Find number of complete words being shifted out and zeroed. - const unsigned Words = getNumWords(); - const unsigned ShiftFullWords = - std::min(shiftAmt / APINT_BITS_PER_WORD, Words); - - // Fill in first Words - ShiftFullWords by shifting. - lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords, - shiftAmt % APINT_BITS_PER_WORD); - - // The remaining high words are all zero. - for (unsigned I = Words - ShiftFullWords; I != Words; ++I) - pVal[I] = 0; +void APInt::lshrSlowCase(unsigned ShiftAmt) { + tcShiftRight(pVal, getNumWords(), ShiftAmt); } /// Left-shift this APInt by shiftAmt. @@ -1202,60 +1156,9 @@ APInt APInt::shl(const APInt &shiftAmt) const { return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); } -APInt APInt::shlSlowCase(unsigned shiftAmt) const { - // If all the bits were shifted out, the result is 0. This avoids issues - // with shifting by the size of the integer type, which produces undefined - // results. We define these "undefined results" to always be 0. - if (shiftAmt == BitWidth) - return APInt(BitWidth, 0); - - // If none of the bits are shifted out, the result is *this. This avoids a - // lshr by the words size in the loop below which can produce incorrect - // results. It also avoids the expensive computation below for a common case. - if (shiftAmt == 0) - return *this; - - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; - - // If we are shifting less than a word, do it the easy way - if (shiftAmt < APINT_BITS_PER_WORD) { - uint64_t carry = 0; - for (unsigned i = 0; i < getNumWords(); i++) { - val[i] = pVal[i] << shiftAmt | carry; - carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt); - } - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Compute some values needed by the remaining shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; - - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - for (unsigned i = 0; i < offset; i++) - val[i] = 0; - for (unsigned i = offset; i < getNumWords(); i++) - val[i] = pVal[i-offset]; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Copy whole words from this to Result. - unsigned i = getNumWords() - 1; - for (; i > offset; --i) - val[i] = pVal[i-offset] << wordShift | - pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift); - val[offset] = pVal[0] << wordShift; - for (i = 0; i < offset; ++i) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; +void APInt::shlSlowCase(unsigned ShiftAmt) { + tcShiftLeft(pVal, getNumWords(), ShiftAmt); + clearUnusedBits(); } // Calculate the rotate amount modulo the bit width. @@ -2239,7 +2142,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, while (Tmp != 0) { unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; Str.push_back(Digits[Digit]); - Tmp = Tmp.lshr(ShiftAmt); + Tmp.lshrInPlace(ShiftAmt); } } else { APInt divisor(Radix == 10? 4 : 8, Radix); @@ -2698,63 +2601,58 @@ int APInt::tcDivide(WordType *lhs, const WordType *rhs, return false; } -/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero. - There are no restrictions on COUNT. */ -void APInt::tcShiftLeft(WordType *dst, unsigned parts, unsigned count) { - if (count) { - /* Jump is the inter-part jump; shift is is intra-part shift. */ - unsigned jump = count / APINT_BITS_PER_WORD; - unsigned shift = count % APINT_BITS_PER_WORD; - - while (parts > jump) { - WordType part; +/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are +/// no restrictions on Count. +void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; - parts--; + /* WordShift is the inter-part shift; BitShift is is intra-part shift. */ + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; - /* dst[i] comes from the two parts src[i - jump] and, if we have - an intra-part shift, src[i - jump - 1]. */ - part = dst[parts - jump]; - if (shift) { - part <<= shift; - if (parts >= jump + 1) - part |= dst[parts - jump - 1] >> (APINT_BITS_PER_WORD - shift); - } - - dst[parts] = part; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); + } else { + while (Words-- > WordShift) { + Dst[Words] = Dst[Words - WordShift] << BitShift; + if (Words > WordShift) + Dst[Words] |= + Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); } - - while (parts > 0) - dst[--parts] = 0; } + + // Fill in the remainder with 0s. + std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); } -/* Shift a bignum right COUNT bits in-place. Shifted in bits are - zero. There are no restrictions on COUNT. */ -void APInt::tcShiftRight(WordType *dst, unsigned parts, unsigned count) { - if (count) { - /* Jump is the inter-part jump; shift is is intra-part shift. */ - unsigned jump = count / APINT_BITS_PER_WORD; - unsigned shift = count % APINT_BITS_PER_WORD; +/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There +/// are no restrictions on Count. +void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; - /* Perform the shift. This leaves the most significant COUNT bits - of the result at zero. */ - for (unsigned i = 0; i < parts; i++) { - WordType part; + // WordShift is the inter-part shift; BitShift is is intra-part shift. + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; - if (i + jump >= parts) { - part = 0; - } else { - part = dst[i + jump]; - if (shift) { - part >>= shift; - if (i + jump + 1 < parts) - part |= dst[i + jump + 1] << (APINT_BITS_PER_WORD - shift); - } - } - - dst[i] = part; + unsigned WordsToMove = Words - WordShift; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + for (unsigned i = 0; i != WordsToMove; ++i) { + Dst[i] = Dst[i + WordShift] >> BitShift; + if (i + 1 != WordsToMove) + Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); } } + + // Fill in the remainder with 0s. + std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); } /* Bitwise and of two bignums. */ diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp index f4a9108b8544..34345901eab1 100644 --- a/contrib/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm/lib/Support/CommandLine.cpp @@ -2069,12 +2069,15 @@ public: #ifndef NDEBUG OS << " with assertions"; #endif +#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO std::string CPU = sys::getHostCPUName(); if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" << " Default target: " << sys::getDefaultTargetTriple() << '\n' - << " Host CPU: " << CPU << '\n'; + << " Host CPU: " << CPU; +#endif + OS << '\n'; } void operator=(bool OptionWasSpecified) { if (!OptionWasSpecified) diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp index f13da62e4a87..200546857de7 100644 --- a/contrib/llvm/lib/Support/Dwarf.cpp +++ b/contrib/llvm/lib/Support/Dwarf.cpp @@ -22,7 +22,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { switch (Tag) { default: return StringRef(); -#define HANDLE_DW_TAG(ID, NAME) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; #include "llvm/Support/Dwarf.def" @@ -31,11 +31,34 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch<unsigned>(TagString) -#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME) +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_TAG_" #NAME, DW_TAG_##NAME) #include "llvm/Support/Dwarf.def" .Default(DW_TAG_invalid); } +unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { case DW_CHILDREN_no: return "DW_CHILDREN_no"; @@ -48,29 +71,73 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) { switch (Attribute) { default: return StringRef(); -#define HANDLE_DW_AT(ID, NAME) \ - case DW_AT_##NAME: \ +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ return "DW_AT_" #NAME; #include "llvm/Support/Dwarf.def" } } +unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_FORM(ID, NAME) \ - case DW_FORM_##NAME: \ +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ return "DW_FORM_" #NAME; #include "llvm/Support/Dwarf.def" } } +unsigned llvm::dwarf::FormVersion(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::FormVendor(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_OP(ID, NAME) \ +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return "DW_OP_" #NAME; #include "llvm/Support/Dwarf.def" @@ -81,17 +148,40 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { return StringSwitch<unsigned>(OperationEncodingString) -#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME) +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + .Case("DW_OP_" #NAME, DW_OP_##NAME) #include "llvm/Support/Dwarf.def" .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Default(0); } +unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_ATE(ID, NAME) \ +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return "DW_ATE_" #NAME; #include "llvm/Support/Dwarf.def" @@ -100,11 +190,34 @@ StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) { return StringSwitch<unsigned>(EncodingString) -#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME) +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + .Case("DW_ATE_" #NAME, DW_ATE_##NAME) #include "llvm/Support/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { case DW_DS_unsigned: return "DW_DS_unsigned"; @@ -169,7 +282,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { switch (Language) { default: return StringRef(); -#define HANDLE_DW_LANG(ID, NAME) \ +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; #include "llvm/Support/Dwarf.def" @@ -178,11 +291,34 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch<unsigned>(LanguageString) -#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME) +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_LANG_" #NAME, DW_LANG_##NAME) #include "llvm/Support/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; @@ -394,3 +530,12 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) { return StringRef(); } + +bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, + bool ExtensionsOk) { + if (FormVendor(F) == DWARF_VENDOR_DWARF) { + unsigned FV = FormVersion(F); + return FV > 0 && FV <= Version; + } + return ExtensionsOk; +} diff --git a/contrib/llvm/lib/Support/LowLevelType.cpp b/contrib/llvm/lib/Support/LowLevelType.cpp index 4290d69cd197..0ee3f1d0119e 100644 --- a/contrib/llvm/lib/Support/LowLevelType.cpp +++ b/contrib/llvm/lib/Support/LowLevelType.cpp @@ -18,25 +18,25 @@ using namespace llvm; LLT::LLT(MVT VT) { if (VT.isVector()) { - SizeInBits = VT.getVectorElementType().getSizeInBits(); - ElementsOrAddrSpace = VT.getVectorNumElements(); - Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; + init(/*isPointer=*/false, VT.getVectorNumElements() > 1, + VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(), + /*AddressSpace=*/0); } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. - Kind = Scalar; - SizeInBits = VT.getSizeInBits(); - ElementsOrAddrSpace = 1; - assert(SizeInBits != 0 && "invalid zero-sized type"); + assert(VT.getSizeInBits() != 0 && "invalid zero-sized type"); + init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0, + VT.getSizeInBits(), /*AddressSpace=*/0); } else { - Kind = Invalid; - SizeInBits = ElementsOrAddrSpace = 0; + IsPointer = false; + IsVector = false; + RawData = 0; } } void LLT::print(raw_ostream &OS) const { if (isVector()) - OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">"; + OS << "<" << getNumElements() << " x " << getElementType() << ">"; else if (isPointer()) OS << "p" << getAddressSpace(); else if (isValid()) { @@ -45,3 +45,12 @@ void LLT::print(raw_ostream &OS) const { } else llvm_unreachable("trying to print an invalid type"); } + +const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo; diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp index 68ba79e11766..b1087fd8853c 100644 --- a/contrib/llvm/lib/Support/Regex.cpp +++ b/contrib/llvm/lib/Support/Regex.cpp @@ -48,7 +48,7 @@ Regex::~Regex() { } } -bool Regex::isValid(std::string &Error) { +bool Regex::isValid(std::string &Error) const { if (!error) return true; diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp index 639d2ece263a..bba7c6d0d604 100644 --- a/contrib/llvm/lib/Support/TargetParser.cpp +++ b/contrib/llvm/lib/Support/TargetParser.cpp @@ -210,7 +210,7 @@ bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind, else Features.push_back("-hwdiv-arm"); - if (HWDivKind & ARM::AEK_HWDIV) + if (HWDivKind & ARM::AEK_HWDIVTHUMB) Features.push_back("+hwdiv"); else Features.push_back("-hwdiv"); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ae01ea477bb9..7141e77fcd25 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1865,7 +1865,7 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was at zero in the argument - OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); + OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); } UsefulBits &= OpUsefulBits; @@ -1894,13 +1894,13 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask = Mask.shl(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.lshr(ShiftAmt); + Mask.lshrInPlace(ShiftAmt); } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { // Shift Right // We do not handle AArch64_AM::ASR, because the sign will change the // number of useful bits uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.lshr(ShiftAmt); + Mask.lshrInPlace(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); Mask = Mask.shl(ShiftAmt); } else @@ -1954,7 +1954,7 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, if (Op.getOperand(1) == Orig) { // Copy the bits from the result to the zero bits. Mask = ResultUsefulBits & OpUsefulBits; - Mask = Mask.lshr(LSB); + Mask.lshrInPlace(LSB); } if (Op.getOperand(0) == Orig) diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d3289ac84c3..4ddc95199d4c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3239,30 +3239,26 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - if (getTargetMachine().getCodeModel() == CodeModel::Large && - Subtarget->isTargetMachO()) { - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + auto GV = G->getGlobal(); + if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) == + AArch64II::MO_GOT) { + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); + } else { const GlobalValue *GV = G->getGlobal(); - bool InternalLinkage = GV->hasInternalLinkage(); - if (InternalLinkage) - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); - else { - Callee = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); - Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); - } - } else if (ExternalSymbolSDNode *S = - dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); + } + } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget->isTargetMachO()) { const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); + } else { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } - } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); - } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } // We don't usually want to end the call-sequence here because we would tidy @@ -7130,7 +7126,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { if (I->getOpcode() != Instruction::FMul) return true; - if (I->getNumUses() != 1) + if (!I->hasOneUse()) return true; Instruction *User = I->user_back(); @@ -10395,7 +10391,7 @@ bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, // call. This will cause the optimizers to attempt to move, or duplicate, // return instructions to help enable tail call optimizations for this // instruction. -bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2ad6c8b23df8..a023b4373835 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -593,7 +593,7 @@ private: } bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, bool &IsInc, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 4449412532f3..82e9c5a88e3b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2586,6 +2586,11 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, Sched<[WriteF]>; } +// Similarly add aliases +def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, + Requires<[HasFullFP16]>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; //===----------------------------------------------------------------------===// // Floating point conversion instruction. diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 878dac6bff1e..5e01b6cd2b46 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -20,6 +20,7 @@ #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 20a5979f9b4b..6f9021c4a030 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -482,7 +482,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { auto &MO = MI.getOperand(Idx); - if (!MO.isReg()) + if (!MO.isReg() || !MO.getReg()) continue; LLT Ty = MRI.getType(MO.getReg()); @@ -537,7 +537,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands}; SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { - if (MI.getOperand(Idx).isReg()) { + if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); if (!Mapping->isValid()) return InstructionMapping(); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 6bce4ef6b652..4bd77d344488 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -265,6 +265,12 @@ def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4 // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>; // SIMD Miscellaneous Instructions diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index b3aba4781db8..042755bd36d0 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -35,6 +35,11 @@ static cl::opt<bool> UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden); +static cl::opt<bool> + UseNonLazyBind("aarch64-enable-nonlazybind", + cl::desc("Call nonlazybind functions via direct GOT load"), + cl::init(false), cl::Hidden); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, StringRef CPUString) { @@ -155,6 +160,23 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, return AArch64II::MO_NO_FLAG; } +unsigned char AArch64Subtarget::classifyGlobalFunctionReference( + const GlobalValue *GV, const TargetMachine &TM) const { + // MachO large model always goes via a GOT, because we don't have the + // relocations available to do anything else.. + if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && + !GV->hasInternalLinkage()) + return AArch64II::MO_GOT; + + // NonLazyBind goes via GOT unless we know it's available locally. + auto *F = dyn_cast<Function>(GV); + if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && + !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return AArch64II::MO_GOT; + + return AArch64II::MO_NO_FLAG; +} + /// This function returns the name of a function which has an interface /// like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index 40ad9185012c..3d66a9ea8ce6 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -271,6 +271,9 @@ public: unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const; + unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, + const TargetMachine &TM) const; + /// This function returns the name of a function which has an interface /// like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index cbab68979c56..d7bbc2bcd22c 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2100,27 +2100,9 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { bool isNegative = parseOptionalToken(AsmToken::Minus); const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble(), Tok.getString()); - if (isNegative) - RealVal.changeSign(); - - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); - Parser.Lex(); // Eat the token. - // Check for out of range values. As an exception, we let Zero through, - // as we handle that special case in post-processing before matching in - // order to use the zero register for it. - if (Val == -1 && !RealVal.isPosZero()) { - TokError("expected compatible register or floating-point constant"); - return MatchOperand_ParseFail; - } - Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - if (Tok.is(AsmToken::Integer)) { + if (Tok.is(AsmToken::Real) || Tok.is(AsmToken::Integer)) { int64_t Val; - if (!isNegative && Tok.getString().startswith("0x")) { + if (Tok.is(AsmToken::Integer) && !isNegative && Tok.getString().startswith("0x")) { Val = Tok.getIntVal(); if (Val > 255 || Val < 0) { TokError("encoded floating point value out of range"); @@ -2128,10 +2110,24 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { } } else { APFloat RealVal(APFloat::IEEEdouble(), Tok.getString()); + if (isNegative) + RealVal.changeSign(); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); + + // Check for out of range values. As an exception we let Zero through, + // but as tokens instead of an FPImm so that it can be matched by the + // appropriate alias if one exists. + if (RealVal.isPosZero()) { + Parser.Lex(); // Eat the token. + Operands.push_back(AArch64Operand::CreateToken("#0", false, S, getContext())); + Operands.push_back(AArch64Operand::CreateToken(".0", false, S, getContext())); + return MatchOperand_Success; + } else if (Val == -1) { + TokError("expected compatible register or floating-point constant"); + return MatchOperand_ParseFail; + } } Parser.Lex(); // Eat the token. Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); @@ -3655,21 +3651,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } } - // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. - if (NumOperands == 3 && Tok == "fmov") { - AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]); - AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]); - if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) { - unsigned zreg = - !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains( - RegOp.getReg()) - ? AArch64::WZR - : AArch64::XZR; - Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(), - Op.getEndLoc(), getContext()); - } - } - MCInst Inst; // First try to match against the secondary set of tables containing the // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2"). diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 8fc822329595..94112849f84e 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -39,7 +39,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; @@ -71,7 +71,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { // We prefer NEON instructions to be printed in the short form. AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant; - PointerSize = 8; + CodePointerSize = 8; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0446655830d1..a81bcb56dfdc 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { + const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>(); + if (!MFI->isEntryFunction()) + return; + const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; amd_kernel_code_t KernelCode; @@ -184,9 +188,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); // The starting address of all shader programs must be 256 bytes aligned. - MF.setAlignment(8); + // Regular functions just need the basic required instruction alignment. + MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); SetupMachineFunction(MF); @@ -220,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(CommentSection); if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - OutStreamer->emitRawComment(" Kernel info:", false); - OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen), - false); + if (MFI->isEntryFunction()) { + OutStreamer->emitRawComment(" Kernel info:", false); + } else { + OutStreamer->emitRawComment(" Function info:", false); + } + + OutStreamer->emitRawComment(" codeLenInByte = " + + Twine(getFunctionCodeSize(MF)), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), false); + OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), false); OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), @@ -236,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + " bytes/workgroup (compile time only)", false); + if (!MFI->isEntryFunction()) + return false; + OutStreamer->emitRawComment(" SGPRBlocks: " + Twine(KernelInfo.SGPRBlocks), false); OutStreamer->emitRawComment(" VGPRBlocks: " + @@ -317,7 +332,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { const MachineOperand &MO = MI.getOperand(op_idx); if (!MO.isReg()) continue; - unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; + unsigned HWReg = RI->getHWRegIndex(MO.getReg()); // Register with value > 127 aren't GPR if (HWReg > 127) @@ -360,18 +375,12 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } } -void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, - const MachineFunction &MF) const { +uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const { const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - uint64_t CodeSize = 0; - unsigned MaxSGPR = 0; - unsigned MaxVGPR = 0; - bool VCCUsed = false; - bool FlatUsed = false; - const SIRegisterInfo *RI = STM.getRegisterInfo(); const SIInstrInfo *TII = STM.getInstrInfo(); + uint64_t CodeSize = 0; + for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. @@ -380,122 +389,86 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (MI.isDebugValue()) continue; - if (isVerbose()) - CodeSize += TII->getInstSizeInBytes(MI); + CodeSize += TII->getInstSizeInBytes(MI); + } + } - unsigned numOperands = MI.getNumOperands(); - for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { - const MachineOperand &MO = MI.getOperand(op_idx); - unsigned width = 0; - bool isSGPR = false; + return CodeSize; +} - if (!MO.isReg()) - continue; +static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, + const SIInstrInfo &TII, + unsigned Reg) { + for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) { + if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent())) + return true; + } - unsigned reg = MO.getReg(); - switch (reg) { - case AMDGPU::EXEC: - case AMDGPU::EXEC_LO: - case AMDGPU::EXEC_HI: - case AMDGPU::SCC: - case AMDGPU::M0: - case AMDGPU::SRC_SHARED_BASE: - case AMDGPU::SRC_SHARED_LIMIT: - case AMDGPU::SRC_PRIVATE_BASE: - case AMDGPU::SRC_PRIVATE_LIMIT: - continue; + return false; +} - case AMDGPU::VCC: - case AMDGPU::VCC_LO: - case AMDGPU::VCC_HI: - VCCUsed = true; - continue; +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, + const MachineFunction &MF) const { + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = STM.getInstrInfo(); + const SIRegisterInfo *RI = &TII->getRegisterInfo(); - case AMDGPU::FLAT_SCR: - case AMDGPU::FLAT_SCR_LO: - case AMDGPU::FLAT_SCR_HI: - // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat - // instructions aren't used to access the scratch buffer. - if (MFI->hasFlatScratchInit()) - FlatUsed = true; - continue; - case AMDGPU::TBA: - case AMDGPU::TBA_LO: - case AMDGPU::TBA_HI: - case AMDGPU::TMA: - case AMDGPU::TMA_LO: - case AMDGPU::TMA_HI: - llvm_unreachable("trap handler registers should not be used"); - - default: - break; - } - - if (AMDGPU::SReg_32RegClass.contains(reg)) { - assert(!AMDGPU::TTMP_32RegClass.contains(reg) && - "trap handler registers should not be used"); - isSGPR = true; - width = 1; - } else if (AMDGPU::VGPR_32RegClass.contains(reg)) { - isSGPR = false; - width = 1; - } else if (AMDGPU::SReg_64RegClass.contains(reg)) { - assert(!AMDGPU::TTMP_64RegClass.contains(reg) && - "trap handler registers should not be used"); - isSGPR = true; - width = 2; - } else if (AMDGPU::VReg_64RegClass.contains(reg)) { - isSGPR = false; - width = 2; - } else if (AMDGPU::VReg_96RegClass.contains(reg)) { - isSGPR = false; - width = 3; - } else if (AMDGPU::SReg_128RegClass.contains(reg)) { - isSGPR = true; - width = 4; - } else if (AMDGPU::VReg_128RegClass.contains(reg)) { - isSGPR = false; - width = 4; - } else if (AMDGPU::SReg_256RegClass.contains(reg)) { - isSGPR = true; - width = 8; - } else if (AMDGPU::VReg_256RegClass.contains(reg)) { - isSGPR = false; - width = 8; - } else if (AMDGPU::SReg_512RegClass.contains(reg)) { - isSGPR = true; - width = 16; - } else if (AMDGPU::VReg_512RegClass.contains(reg)) { - isSGPR = false; - width = 16; - } else { - llvm_unreachable("Unknown register class"); - } - unsigned hwReg = RI->getEncodingValue(reg) & 0xff; - unsigned maxUsed = hwReg + width - 1; - if (isSGPR) { - MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; - } else { - MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; - } - } + MCPhysReg NumVGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + NumVGPRReg = Reg; + break; + } + } + + MCPhysReg NumSGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + NumSGPRReg = Reg; + break; } } + // We found the maximum register index. They start at 0, so add one to get the + // number of registers. + ProgInfo.NumVGPR = NumVGPRReg == AMDGPU::NoRegister ? 0 : + RI->getHWRegIndex(NumVGPRReg) + 1; + ProgInfo.NumSGPR = NumSGPRReg == AMDGPU::NoRegister ? 0 : + RI->getHWRegIndex(NumSGPRReg) + 1; unsigned ExtraSGPRs = 0; - if (VCCUsed) + ProgInfo.VCCUsed = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || + MRI.isPhysRegUsed(AMDGPU::VCC_HI); + if (ProgInfo.VCCUsed) ExtraSGPRs = 2; + ProgInfo.FlatUsed = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || + MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI); + + // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat + // instructions aren't used to access the scratch buffer. Inline assembly + // may need it though. + // + // If we only have implicit uses of flat_scr on flat instructions, it is not + // really needed. + if (ProgInfo.FlatUsed && !MFI->hasFlatScratchInit() && + (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && + !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && + !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { + ProgInfo.FlatUsed = false; + } + if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { - if (FlatUsed) + if (ProgInfo.FlatUsed) ExtraSGPRs = 4; } else { if (STM.isXNACKEnabled()) ExtraSGPRs = 4; - if (FlatUsed) + if (ProgInfo.FlatUsed) ExtraSGPRs = 6; } @@ -505,34 +478,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && !STM.hasSGPRInitBug()) { unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); - if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { + if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "addressable scalar registers", - MaxSGPR + 1, DS_Error, + ProgInfo.NumSGPR, DS_Error, DK_ResourceLimit, MaxAddressableNumSGPRs); Ctx.diagnose(Diag); - MaxSGPR = MaxAddressableNumSGPRs - 1; + ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1; } } // Account for extra SGPRs and VGPRs reserved for debugger use. - MaxSGPR += ExtraSGPRs; - MaxVGPR += ExtraVGPRs; - - // We found the maximum register index. They start at 0, so add one to get the - // number of registers. - ProgInfo.NumSGPR = MaxSGPR + 1; - ProgInfo.NumVGPR = MaxVGPR + 1; + ProgInfo.NumSGPR += ExtraSGPRs; + ProgInfo.NumVGPR += ExtraVGPRs; // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( - ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); + std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); ProgInfo.NumVGPRsForWavesPerEU = std::max( - ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); + std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { @@ -559,10 +527,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { + if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) { LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs", - MFI->NumUserSGPRs, DS_Error); + MFI->getNumUserSGPRs(), DS_Error); Ctx.diagnose(Diag); } @@ -584,7 +552,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1; // Record first reserved VGPR and number of reserved VGPRs. - ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0; ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and @@ -609,10 +577,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); ProgInfo.ScratchSize = FrameInfo.getStackSize(); - ProgInfo.FlatUsed = FlatUsed; - ProgInfo.VCCUsed = VCCUsed; - ProgInfo.CodeLen = CodeSize; - unsigned LDSAlignShift; if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. @@ -623,7 +587,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } unsigned LDSSpillSize = - MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize(); + MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize(); ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 13425c8b2a0f..8c86dea4b885 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -55,7 +55,7 @@ private: uint32_t NumVGPR = 0; uint32_t NumSGPR = 0; - uint32_t LDSSize; + uint32_t LDSSize = 0; bool FlatUsed = false; // Number of SGPRs that meets number of waves per execution unit request. @@ -85,11 +85,11 @@ private: // Bonus information for debugging. bool VCCUsed = false; - uint64_t CodeLen = 0; SIProgramInfo() = default; }; + uint64_t getFunctionCodeSize(const MachineFunction &MF) const; void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const; void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 36bc2498781f..a5cda817ac11 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -415,9 +415,11 @@ public: return 0; } + // Scratch is allocated in 256 dword per wave blocks for the entire + // wavefront. When viewed from the perspecive of an arbitrary workitem, this + // is 4-byte aligned. unsigned getStackAlignment() const { - // Scratch is allocated in 256 dword per wave blocks. - return 4 * 256 / getWavefrontSize(); + return 4; } bool enableMachineScheduler() const override { diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 01ac9968181a..6edd3e923ba1 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -426,16 +426,23 @@ static bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); // Arguments to compute shaders are never a source of divergence. - if (!AMDGPU::isShader(F->getCallingConv())) + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: return true; - - // For non-compute shaders, SGPR inputs are marked with either inreg or byval. - if (F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || - F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal)) - return true; - - // Everything else is in VGPRs. - return false; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + // For non-compute shaders, SGPR inputs are marked with either inreg or byval. + // Everything else is in VGPRs. + return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || + F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); + default: + // TODO: Should calls support inreg for SGPR inputs? + return false; + } } /// diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td index a9f64589fa5e..357e18108e7e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -255,8 +255,6 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag> [(set i32:$vdst, (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > { - let LGKM_CNT = 0; - let mayLoad = 0; let mayStore = 0; let isConvergent = 1; diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 1655591abf39..6c61fb1f2d6b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -14,6 +14,7 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { + CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MinInstAlignment = 4; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7268131396dc..dd867b15b4c7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -461,6 +461,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); + } else { + setOperationAction(ISD::SELECT, MVT::v2i16, Custom); + setOperationAction(ISD::SELECT, MVT::v2f16, Custom); + } + + for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) { + setOperationAction(ISD::SELECT, VT, Custom); } setTargetDAGCombine(ISD::FADD); @@ -2191,6 +2198,28 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, break; } } + case ISD::SELECT: { + SDLoc SL(N); + EVT VT = N->getValueType(0); + EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); + SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2)); + + EVT SelectVT = NewVT; + if (NewVT.bitsLT(MVT::i32)) { + LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS); + RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS); + SelectVT = MVT::i32; + } + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT, + N->getOperand(0), LHS, RHS); + + if (NewVT != SelectVT) + NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect)); + return; + } default: break; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index a84f3e274f82..810fb05984c4 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -133,14 +133,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { AMDGPUBufferPseudoSourceValue BufferPSV; AMDGPUImagePseudoSourceValue ImagePSV; -public: - // FIXME: Make private +private: unsigned LDSWaveSpillSize; unsigned ScratchOffsetReg; unsigned NumUserSGPRs; unsigned NumSystemSGPRs; -private: bool HasSpilledSGPRs; bool HasSpilledVGPRs; bool HasNonSpillStackObjects; @@ -535,6 +533,10 @@ public: llvm_unreachable("unexpected dimension"); } + unsigned getLDSWaveSpillSize() const { + return LDSWaveSpillSize; + } + const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 36d4df52ff0e..098c67252dd8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -124,7 +124,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( unsigned RegCount = ST.getMaxNumSGPRs(MF); unsigned Reg; - // Try to place it in a hole after PrivateSegmentbufferReg. + // Try to place it in a hole after PrivateSegmentBufferReg. if (RegCount & 3) { // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to // alignment constraints, so we have a hole where can put the wave offset. diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 57f9d1c6b610..005b74a68af3 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", [FeatureFPARMv8]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict FP to 16 double registers">; -def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instructions">; +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", + "true", + "Enable divide instructions in Thumb">; def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", "HasHardwareDivideInARM", "true", "Enable divide instructions in ARM mode">; @@ -225,7 +226,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureVirtualization : SubtargetFeature<"virtualization", "HasVirtualization", "true", "Supports Virtualization extension", - [FeatureHWDiv, FeatureHWDivARM]>; + [FeatureHWDivThumb, FeatureHWDivARM]>; // M-series ISA def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", @@ -433,21 +434,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureRClass]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass]>; def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass, FeatureDSP]>; @@ -502,7 +503,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureV7Clrex, Feature8MSecExt, FeatureAcquireRelease, @@ -512,7 +513,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", [HasV8MMainlineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, Feature8MSecExt, FeatureAcquireRelease, FeatureMClass]>; @@ -678,7 +679,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureFP16, FeatureAvoidPartialCPSR, FeatureVFP4, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM]>; def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, @@ -686,7 +687,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -768,39 +769,39 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline, FeatureVFPOnlySP]>; def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; @@ -811,7 +812,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -820,25 +821,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureZCZeroing]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index eb0d410b596b..14e197f477f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { ATS.finishAttributeSection(); } -static bool isV8M(const ARMSubtarget *Subtarget) { - // Note that v8M Baseline is a subset of v6T2! - return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) || - Subtarget->hasV8MMainlineOps(); -} - //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, - const ARMSubtarget *Subtarget) { - if (CPU == "xscale") - return ARMBuildAttrs::v5TEJ; - - if (Subtarget->hasV8Ops()) { - if (Subtarget->isRClass()) - return ARMBuildAttrs::v8_R; - return ARMBuildAttrs::v8_A; - } else if (Subtarget->hasV8MMainlineOps()) - return ARMBuildAttrs::v8_M_Main; - else if (Subtarget->hasV7Ops()) { - if (Subtarget->isMClass() && Subtarget->hasDSP()) - return ARMBuildAttrs::v7E_M; - return ARMBuildAttrs::v7; - } else if (Subtarget->hasV6T2Ops()) - return ARMBuildAttrs::v6T2; - else if (Subtarget->hasV8MBaselineOps()) - return ARMBuildAttrs::v8_M_Base; - else if (Subtarget->hasV6MOps()) - return ARMBuildAttrs::v6S_M; - else if (Subtarget->hasV6Ops()) - return ARMBuildAttrs::v6; - else if (Subtarget->hasV5TEOps()) - return ARMBuildAttrs::v5TE; - else if (Subtarget->hasV5TOps()) - return ARMBuildAttrs::v5T; - else if (Subtarget->hasV4TOps()) - return ARMBuildAttrs::v4T; - else - return ARMBuildAttrs::v4; -} - // Returns true if all functions have the same function attribute value. // It also returns true when the module has no functions. static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr, @@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() { static_cast<const ARMBaseTargetMachine &>(TM); const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); - const std::string &CPUString = STI.getCPUString(); - - if (!StringRef(CPUString).startswith("generic")) { - // FIXME: remove krait check when GNU tools support krait cpu - if (STI.isKrait()) { - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); - // We consider krait as a "cortex-a9" + hwdiv CPU - // Enable hwdiv through ".arch_extension idiv" - if (STI.hasDivide() || STI.hasDivideInARMMode()) - ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM); - } else - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); - } - - ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI)); - - // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (STI.hasV7Ops() || isV8M(&STI)) { - if (STI.isAClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - } else if (STI.isRClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::RealTimeProfile); - } else if (STI.isMClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::MicroControllerProfile); - } - } - - ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, - STI.hasARMOps() ? ARMBuildAttrs::Allowed - : ARMBuildAttrs::Not_Allowed); - if (isV8M(&STI)) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumbDerived); - } else if (STI.isThumb1Only()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (STI.hasThumb2()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } - - if (STI.hasNEON()) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (STI.hasFPARMv8()) { - if (STI.hasCrypto()) - ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); - else - ATS.emitFPU(ARM::FK_NEON_FP_ARMV8); - } else if (STI.hasVFP4()) - ATS.emitFPU(ARM::FK_NEON_VFPV4); - else - ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); - // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture - if (STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); - } else { - if (STI.hasFPARMv8()) - // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one - // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) - : ARM::FK_FP_ARMV8); - else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) - : ARM::FK_VFPV4); - else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() - // +d16 - ? (STI.isFPOnlySP() - ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) - : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) - // -d16 - : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); - else if (STI.hasVFP2()) - ATS.emitFPU(ARM::FK_VFPV2); - } + // Emit build attributes for the available hardware. + ATS.emitTargetAttributes(STI); // RW data addressing. if (isPositionIndependent()) { @@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEEE754); - if (STI.allowsUnalignedMem()) - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Allowed); - else - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Not_Allowed); - // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); - // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.isFPOnlySP()) - ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, - ARMBuildAttrs::HardFPSinglePrecision); - // Hard float. Use both S and D registers and conform to AAPCS-VFP. if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); - // FIXME: Should we signal R9 usage? - - if (STI.hasFP16()) - ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); - // FIXME: To support emitting this build attribute as GCC does, the // -mfp16-format option and associated plumbing must be // supported. For now the __fp16 type is exposed by default, so this @@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (STI.hasMPExtension()) - ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); - - // Hardware divide in ARM mode is part of base arch, starting from ARMv8. - // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). - // It is not possible to produce DisallowDIV: if hwdiv is present in the base - // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. - // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; - // otherwise, the default value (AllowDIVIfExists) applies. - if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); - - if (STI.hasDSP() && isV8M(&STI)) - ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); - if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width @@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() { else ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); - - if (STI.hasTrustZone() && STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZVirtualization); - else if (STI.hasTrustZone()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZ); - else if (STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowVirtualization); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 23777b821f9f..faf1c631a3a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -404,6 +404,29 @@ public: /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isSwiftFastImmShift(const MachineInstr *MI) const; + + /// Returns predicate register associated with the given frame instruction. + unsigned getFramePred(const MachineInstr &MI) const { + assert(isFrameInstr(MI)); + if (isFrameSetup(MI)) + // Operands of ADJCALLSTACKDOWN: + // - argument declared in ADJCALLSTACKDOWN pattern: + // 0 - frame size + // 1 - predicate code (like ARMCC::AL) + // - added by predOps: + // 2 - predicate reg + return MI.getOperand(2).getReg(); + assert(MI.getOpcode() == ARM::ADJCALLSTACKUP || + MI.getOpcode() == ARM::tADJCALLSTACKUP); + // Operands of ADJCALLSTACKUP: + // - argument declared in ADJCALLSTACKUP pattern: + // 0 - frame size + // 1 - arg of CALLSEQ_END + // 2 - predicate code + // - added by predOps: + // 3 - predicate reg + return MI.getOperand(3).getReg(); + } }; /// Get the operands corresponding to the given \p Pred value. By default, the diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 7a7b7fede7c8..bc7afdb7f1c9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; -def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, - (sequence "R%u", 12, 1), - (sequence "D%u", 31, 0))>; +def CSR_iOS_TLSCall + : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), + (sequence "D%u", 31, 0))>; // C++ TLS access function saves all registers except SP. Try to match // the order of CSRs in CSR_iOS. diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 23722f1b7f3f..6434df317aa8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() { .add(MI->getOperand(1)); MI->eraseFromParent(); MadeChange = true; - } - if (MI->getOpcode() == ARM::tPUSH && - MI->getOperand(2).getReg() == ARM::LR && - MI->getNumExplicitOperands() == 3) { + } else if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { // Just remove the push. MI->eraseFromParent(); MadeChange = true; @@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // If we're in PIC mode, there should be another ADD following. auto *TRI = STI->getRegisterInfo(); + + // %base cannot be redefined after the load as it will appear before + // TBB/TBH like: + // %base = + // %base = + // tBB %base, %idx + if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) + continue; + if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 01e062bd185c..e9bc7db66fa4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { // If we have integer div support we should have selected this automagically. // In case we have a real miss go ahead and return false and we'll pick // it up later. - if (Subtarget->hasDivide()) return false; + if (Subtarget->hasDivideInThumbMode()) + return false; // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 37be22bed540..70dbe1bc5b95 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, } } +/// We need the offset of the frame pointer relative to other MachineFrameInfo +/// offsets which are encoded relative to SP at function begin. +/// See also emitPrologue() for how the FP is set up. +/// Unfortunately we cannot determine this value in determineCalleeSaves() yet +/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use +/// this to produce a conservative estimate that we check in an assert() later. +static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { + // This is a conservative estimation: Assume the frame pointer being r7 and + // pc("r15") up to r8 getting spilled before (= 8 registers). + return -AFI.getArgRegsSaveSize() - (8 * 4); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = - MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; + int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); + assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + "Max FP estimation is wrong"); + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -1700,6 +1714,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // worth the effort and added fragility? unsigned EstimatedStackSize = MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + + // Determine biggest (positive) SP offset in MachineFrameInfo. + int MaxFixedOffset = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I); + MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset); + } + bool HasFP = hasFP(MF); if (HasFP) { if (AFI->hasStackFrame()) @@ -1707,15 +1729,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else { // If FP is not used, SP will be used to access arguments, so count the // size of arguments into the estimation. - EstimatedStackSize += AFI->getArgumentStackSize(); + EstimatedStackSize += MaxFixedOffset; } EstimatedStackSize += 16; // For possible paddings. - bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || - MFI.hasVarSizedObjects() || - (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); + unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || + // For large argument stacks fp relative addressed may overflow. + (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + if (BigFrameOffsets || + !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); if (HasFP) { @@ -1899,7 +1926,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -1958,7 +1985,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1976,14 +2003,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); + unsigned PredReg = TII.getFramePred(Old); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old.getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old.getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index b07b4e1f5cfb..e9df9449103c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -228,11 +228,6 @@ private: const uint16_t *DOpcodes, const uint16_t *QOpcodes = nullptr); - /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, - /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be - /// generated to force the table registers to be consecutive. - void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); - /// Try to select SBFX/UBFX instructions for ARM. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { HandleSDNode Handle(N); + SDLoc Loc(N); replaceDAGValue(N.getOperand(1), NewMulConst); BaseReg = Handle.getValue(); - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, - PowerOfTwo), - SDLoc(N), MVT::i32); + Opc = CurDAG->getTargetConstant( + ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); return true; } } @@ -1859,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { return Opc; // If not one we handle, return it unchanged. } +/// Returns true if the given increment is a Constant known to be equal to the +/// access size performed by a NEON load/store. This means the "[rN]!" form can +/// be used. +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { + auto C = dyn_cast<ConstantSDNode>(Inc); + return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; +} + void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, const uint16_t *QOpcodes0, @@ -1926,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if ((NumVecs <= 2) && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || - !isa<ConstantSDNode>(Inc.getNode())) - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2080,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); else if (NumVecs > 2 && !isVSTfixed(Opc)) Ops.push_back(Reg0); @@ -2214,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } SDValue SuperReg; @@ -2318,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!isa<ConstantSDNode>(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. else if (NumVecs > 2) @@ -2356,39 +2364,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, - unsigned Opc) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - SDLoc dl(N); - EVT VT = N->getValueType(0); - unsigned FirstTblReg = IsExt ? 2 : 1; - - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - SDValue V0 = N->getOperand(FirstTblReg + 0); - SDValue V1 = N->getOperand(FirstTblReg + 1); - if (NumVecs == 2) - RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - else { - SDValue V2 = N->getOperand(FirstTblReg + 2); - // If it's a vtbl3, form a quad D-register and leave the last part as - // an undef. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) - : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); - } - - SmallVector<SDValue, 6> Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); - Ops.push_back(getAL(CurDAG, dl)); // predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); -} - bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3730,59 +3705,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - - case Intrinsic::arm_neon_vtbl2: - SelectVTBL(N, false, 2, ARM::VTBL2); - return; - case Intrinsic::arm_neon_vtbl3: - SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); - return; - case Intrinsic::arm_neon_vtbl4: - SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); - return; - - case Intrinsic::arm_neon_vtbx2: - SelectVTBL(N, true, 2, ARM::VTBX2); - return; - case Intrinsic::arm_neon_vtbx3: - SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); - return; - case Intrinsic::arm_neon_vtbx4: - SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); - return; - } - break; - } - - case ARMISD::VTBL1: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Ops[] = {N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); - return; - } - case ARMISD::VTBL2: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - - SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); - return; - } - case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index e697c8ca5339..165e9b7378c7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -852,7 +852,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. @@ -860,7 +860,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { + if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); @@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; @@ -3347,6 +3347,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vtbl1: + return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::arm_neon_vtbl2: + return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } @@ -10867,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode *N, // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { - uint64_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - } else if (NumBytes >= 3 * 16) { + ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode()); + if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; @@ -11688,34 +11691,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, - APInt &KnownOne) { - if (Op.getOpcode() == ARMISD::BFI) { - // Conservatively, we can recurse down the first operand - // and just mask out all affected bits. - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - - // The operand to BFI is already a mask suitable for removing the bits it - // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); - KnownZero &= Mask; - KnownOne &= Mask; - return; - } - if (Op.getOpcode() == ARMISD::CMOV) { - APInt KZ2(KnownZero.getBitWidth(), 0); - APInt KO2(KnownOne.getBitWidth(), 0); - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2); - - KnownZero &= KZ2; - KnownOne &= KO2; - return; - } - return DAG.computeKnownBits(Op, KnownZero, KnownOne); -} - SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11777,7 +11752,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D // Lastly, can we determine that the bits defined by OrCI // are zero in Y? APInt KnownZero, KnownOne; - computeKnownBits(DAG, Y, KnownZero, KnownOne); + DAG.computeKnownBits(Y, KnownZero, KnownOne); if ((OrCI & KnownZero) != OrCI) return SDValue(); @@ -12657,6 +12632,19 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } } + case ARMISD::BFI: { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); + const APInt &Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } } } @@ -13052,7 +13040,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. - if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() + : Subtarget->hasDivideInARMMode(); + if (hasDivide && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 70a0b1380ec9..8b54ce430ed2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -717,7 +717,7 @@ class InstrItineraryData; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index cc0e7d4d9c35..703e8071b177 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -259,8 +259,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16","full half-float">; -def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; +def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, + AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; def HasDSP : Predicate<"Subtarget->hasDSP()">, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 681e235d78f0..9b08c612e16b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; +def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>]>; +def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; +def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; +def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; + + def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); unsigned EltBits = 0; @@ -6443,7 +6451,8 @@ def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), @@ -6498,6 +6507,49 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), + (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vm)), + (v8i8 (VTBX2 v8i8:$orig, + (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBX3Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBX4Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; + // VRINT : Vector Rounding multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index f5b673b78ad7..f710ee6a7e77 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2797,7 +2797,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -2809,7 +2809,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 8d224d6a70fa..816596b85721 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -299,6 +299,20 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; + case G_SUB: + I.setDesc(TII.get(ARM::SUBrr)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + case G_MUL: + if (TII.getSubtarget().hasV6Ops()) { + I.setDesc(TII.get(ARM::MUL)); + } else { + assert(TII.getSubtarget().useMulOps() && "Unsupported target"); + I.setDesc(TII.get(ARM::MULv5)); + MIB->getOperand(0).setIsEarlyClobber(true); + } + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; case G_FADD: if (!selectFAdd(MIB, TII, MRI)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 994bbd673dd8..fe9681439e6b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -43,8 +43,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (auto Ty : {s1, s8, s16, s32}) - setAction({G_ADD, Ty}, Legal); + for (unsigned Op : {G_ADD, G_SUB, G_MUL}) + for (auto Ty : {s1, s8, s16, s32}) + setAction({Op, Ty}, Legal); for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 08f3da738868..e47bd3a8963e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -219,6 +219,8 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case G_ADD: + case G_SUB: + case G_MUL: case G_SEXT: case G_ZEXT: case G_GEP: diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 40993fc0aa8a..d2630685d91b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -208,8 +208,8 @@ protected: /// FP registers for VFPv3. bool HasD16 = false; - /// HasHardwareDivide - True if subtarget supports [su]div - bool HasHardwareDivide = false; + /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode + bool HasHardwareDivideInThumb = false; /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode bool HasHardwareDivideInARM = false; @@ -507,7 +507,7 @@ public: return hasNEON() && UseNEONForSinglePrecisionFP; } - bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasDataBarrier() const { return HasDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f421d3ac1693..ada816c16389 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode( clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb", "Warn in ARM, emit implicit ITs in Thumb"))); +static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes", + cl::init(false)); + class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; @@ -540,6 +543,10 @@ public: // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + // Add build attributes based on the selected target. + if (AddBuildAttributes) + getTargetStreamer().emitTargetAttributes(STI); + // Not in an ITBlock to start with. ITState.CurPosition = ~0U; @@ -10189,8 +10196,8 @@ static const struct { { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, - { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, - {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, + { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, + {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} }, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6fa890ba1cd5..4d6c52f3cd49 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -464,7 +464,7 @@ public: void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes); void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { - LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo); + LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo); MCELFStreamer::ChangeSection(Section, Subsection); auto LastMappingSymbol = LastMappingSymbols.find(Section); if (LastMappingSymbol != LastMappingSymbols.end()) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 73e563890dd9..2b0cd461df7a 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -11,9 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetMachine.h" #include "llvm/MC/ConstantPools.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; @@ -75,3 +79,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} + +static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { + if (STI.getCPU() == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::FeatureRClass)) + return ARMBuildAttrs::v8_R; + return ARMBuildAttrs::v8_A; + } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) + return ARMBuildAttrs::v8_M_Main; + else if (STI.hasFeature(ARM::HasV7Ops)) { + if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; + } else if (STI.hasFeature(ARM::HasV6T2Ops)) + return ARMBuildAttrs::v6T2; + else if (STI.hasFeature(ARM::HasV8MBaselineOps)) + return ARMBuildAttrs::v8_M_Base; + else if (STI.hasFeature(ARM::HasV6MOps)) + return ARMBuildAttrs::v6S_M; + else if (STI.hasFeature(ARM::HasV6Ops)) + return ARMBuildAttrs::v6; + else if (STI.hasFeature(ARM::HasV5TEOps)) + return ARMBuildAttrs::v5TE; + else if (STI.hasFeature(ARM::HasV5TOps)) + return ARMBuildAttrs::v5T; + else if (STI.hasFeature(ARM::HasV4TOps)) + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} + +static bool isV8M(const MCSubtargetInfo &STI) { + // Note that v8M Baseline is a subset of v6T2! + return (STI.hasFeature(ARM::HasV8MBaselineOps) && + !STI.hasFeature(ARM::HasV6T2Ops)) || + STI.hasFeature(ARM::HasV8MMainlineOps); +} + +/// Emit the build attributes that only depend on the hardware that we expect +// /to be available, and not on the ABI, or any source-language choices. +void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { + switchVendor("aeabi"); + + const StringRef CPUString = STI.getCPU(); + if (!CPUString.empty() && !CPUString.startswith("generic")) { + // FIXME: remove krait check when GNU tools support krait cpu + if (STI.hasFeature(ARM::ProcKrait)) { + emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); + // We consider krait as a "cortex-a9" + hwdiv CPU + // Enable hwdiv through ".arch_extension idiv" + if (STI.hasFeature(ARM::FeatureHWDivThumb) || + STI.hasFeature(ARM::FeatureHWDivARM)) + emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); + } else { + emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + } + } + + emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); + + if (STI.hasFeature(ARM::FeatureAClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (STI.hasFeature(ARM::FeatureRClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (STI.hasFeature(ARM::FeatureMClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); + } + + emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) + ? ARMBuildAttrs::Not_Allowed + : ARMBuildAttrs::Allowed); + + if (isV8M(STI)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumbDerived); + } else if (STI.hasFeature(ARM::FeatureThumb2)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (STI.hasFeature(ARM::HasV4TOps)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); + } + + if (STI.hasFeature(ARM::FeatureNEON)) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (STI.hasFeature(ARM::FeatureFPARMv8)) { + if (STI.hasFeature(ARM::FeatureCrypto)) + emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); + else + emitFPU(ARM::FK_NEON_FP_ARMV8); + } else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(ARM::FK_NEON_VFPV4); + else + emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 + : ARM::FK_NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + STI.hasFeature(ARM::HasV8_1aOps) + ? ARMBuildAttrs::AllowNeonARMv8_1a + : ARMBuildAttrs::AllowNeonARMv8); + } else { + if (STI.hasFeature(ARM::FeatureFPARMv8)) + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 + : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); + else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 + : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); + else if (STI.hasFeature(ARM::FeatureVFP3)) + emitFPU( + STI.hasFeature(ARM::FeatureD16) + // +d16 + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 + : ARM::FK_VFPV3XD) + : (STI.hasFeature(ARM::FeatureFP16) + ? ARM::FK_VFPV3_D16_FP16 + : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 + : ARM::FK_VFPV3)); + else if (STI.hasFeature(ARM::FeatureVFP2)) + emitFPU(ARM::FK_VFPV2); + } + + // ABI_HardFP_use attribute to indicate single precision FP. + if (STI.hasFeature(ARM::FeatureVFPOnlySP)) + emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); + + if (STI.hasFeature(ARM::FeatureFP16)) + emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (STI.hasFeature(ARM::FeatureMP)) + emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + // Hardware divide in ARM mode is part of base arch, starting from ARMv8. + // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). + // It is not possible to produce DisallowDIV: if hwdiv is present in the base + // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. + // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; + // otherwise, the default value (AllowDIVIfExists) applies. + if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + + if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) + emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureStrictAlign)) + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + else + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureTrustZone) && + STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (STI.hasFeature(ARM::FeatureTrustZone)) + emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); + else if (STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); +} diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index fc083b98395b..d0fd366ab9ed 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignTo(Amount, getStackAlignment()); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old.getOpcode(); diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp index 9f2ee8cf8035..535bb012eb07 100644 --- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp @@ -18,7 +18,7 @@ namespace llvm { AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) { - PointerSize = 2; + CodePointerSize = 2; CalleeSaveStackSlotSize = 2; CommentString = ";"; PrivateGlobalPrefix = ".L"; diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index 559ac291a79e..fd7c97bf1f0a 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -42,7 +42,7 @@ public: // messed up in random places by 4 bytes. .debug_line // section will be parsable, but with odd offsets and // line numbers, etc. - PointerSize = 8; + CodePointerSize = 8; } }; } diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp index 61d3630ac095..cb3049bf1500 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -1011,12 +1011,7 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { bool BT::reached(const MachineBasicBlock *B) const { int BN = B->getNumber(); assert(BN >= 0); - for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end(); - I != E; ++I) { - if (I->second == BN) - return true; - } - return false; + return ReachedBB.count(BN); } // Visit an individual instruction. This could be a newly added instruction, @@ -1036,6 +1031,8 @@ void BT::reset() { EdgeExec.clear(); InstrExec.clear(); Map.clear(); + ReachedBB.clear(); + ReachedBB.reserve(MF.size()); } void BT::run() { @@ -1068,6 +1065,7 @@ void BT::run() { if (EdgeExec.count(Edge)) continue; EdgeExec.insert(Edge); + ReachedBB.insert(Edge.second); const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second); MachineBasicBlock::const_iterator It = B.begin(), End = B.end(); diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h index a547b34e852f..7f49f430382d 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" @@ -68,10 +69,11 @@ private: typedef std::set<const MachineInstr *> InstrSetType; typedef std::queue<CFGEdge> EdgeQueueType; - EdgeSetType EdgeExec; // Executable flow graph edges. - InstrSetType InstrExec; // Executable instructions. - EdgeQueueType FlowQ; // Work queue of CFG edges. - bool Trace; // Enable tracing for debugging. + EdgeSetType EdgeExec; // Executable flow graph edges. + InstrSetType InstrExec; // Executable instructions. + EdgeQueueType FlowQ; // Work queue of CFG edges. + DenseSet<unsigned> ReachedBB; // Cache of reached blocks. + bool Trace; // Enable tracing for debugging. const MachineEvaluator &ME; MachineFunction &MF; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 418dd71aeb4b..e5eb059b566f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -635,7 +635,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps); } -bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // If either no tail call or told not to tail call at all, don't. auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h index fb8f0ba6b057..1415156487c0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -195,7 +195,7 @@ namespace HexagonISD { const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index b243de317dc5..27b40f134b1f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -35,7 +35,6 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> -#include <map> static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit", cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " @@ -45,10 +44,8 @@ using namespace llvm; using namespace rdf; namespace llvm { - FunctionPass *createHexagonOptAddrMode(); - void initializeHexagonOptAddrModePass(PassRegistry &); - + void initializeHexagonOptAddrModePass(PassRegistry&); } // end namespace llvm namespace { @@ -59,10 +56,7 @@ public: HexagonOptAddrMode() : MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr), - LV(nullptr) { - PassRegistry &R = *PassRegistry::getPassRegistry(); - initializeHexagonOptAddrModePass(R); - } + LV(nullptr) {} StringRef getPassName() const override { return "Optimize addressing mode of load/store"; @@ -84,7 +78,6 @@ private: MachineDominatorTree *MDT; DataFlowGraph *DFG; DataFlowGraph::DefStackMap DefM; - std::map<RegisterRef, std::map<NodeId, NodeId>> RDefMap; Liveness *LV; MISetType Deleted; @@ -99,8 +92,6 @@ private: void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList); bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList); short getBaseWithLongOffset(const MachineInstr &MI) const; - void updateMap(NodeAddr<InstrNode *> IA); - bool constructDefMap(MachineBasicBlock *B); bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); @@ -112,11 +103,11 @@ private: char HexagonOptAddrMode::ID = 0; -INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "opt-amode", +INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) -INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode", +INITIALIZE_PASS_END(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode", false, false) bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) { @@ -173,8 +164,11 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr<UseNode *> UA = *I; NodeAddr<InstrNode *> IA = UA.Addr->getOwner(*DFG); - if ((UA.Addr->getFlags() & NodeAttrs::PhiRef) || - RDefMap[OffsetRR][IA.Id] != OffsetRegRD) + if (UA.Addr->getFlags() & NodeAttrs::PhiRef) + return false; + NodeAddr<RefNode*> AA = LV->getNearestAliasedRef(OffsetRR, IA); + if ((DFG->IsDef(AA) && AA.Id != OffsetRegRD) || + AA.Addr->getReachingDef() != OffsetRegRD) return false; MachineInstr &UseMI = *NodeAddr<StmtNode *>(IA).Addr->getCode(); @@ -486,14 +480,14 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN, MIB.add(AddAslMI->getOperand(2)); MIB.add(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); - MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(), + MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm()+ImmOp.getOffset(), ImmOp.getTargetFlags()); OpStart = 3; } else if (UseMID.mayStore()) { MIB.add(AddAslMI->getOperand(2)); MIB.add(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); - MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(), + MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm()+ImmOp.getOffset(), ImmOp.getTargetFlags()); MIB.add(UseMI->getOperand(2)); OpStart = 3; @@ -597,46 +591,6 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { return Changed; } -void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) { - RegisterSet RRs; - for (NodeAddr<RefNode *> RA : IA.Addr->members(*DFG)) - RRs.insert(RA.Addr->getRegRef(*DFG)); - bool Common = false; - for (auto &R : RDefMap) { - if (!RRs.count(R.first)) - continue; - Common = true; - break; - } - if (!Common) - return; - - for (auto &R : RDefMap) { - auto F = DefM.find(R.first.Reg); - if (F == DefM.end() || F->second.empty()) - continue; - R.second[IA.Id] = F->second.top()->Id; - } -} - -bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) { - bool Changed = false; - auto BA = DFG->getFunc().Addr->findBlock(B, *DFG); - DFG->markBlock(BA.Id, DefM); - - for (NodeAddr<InstrNode *> IA : BA.Addr->members(*DFG)) { - updateMap(IA); - DFG->pushAllDefs(IA, DefM); - } - - MachineDomTreeNode *N = MDT->getNode(B); - for (auto I : *N) - Changed |= constructDefMap(I->getBlock()); - - DFG->releaseBlock(BA.Id, DefM); - return Changed; -} - bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -658,8 +612,6 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { L.computePhiInfo(); LV = &L; - constructDefMap(&DFG->getMF().front()); - Deleted.clear(); NodeAddr<FuncNode *> FA = DFG->getFunc(); DEBUG(dbgs() << "==== [RefMap#]=====:\n " diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 06fc9195fa67..6913d50bbcaa 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -111,6 +111,7 @@ namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); + void initializeHexagonOptAddrModePass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -152,6 +153,7 @@ extern "C" void LLVMInitializeHexagonTarget() { // Register the target. RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget()); initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry()); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp index 57ce9fabc5e3..ea86ffba58f6 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -59,7 +59,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) { bool CopyPropagation::scanBlock(MachineBasicBlock *B) { bool Changed = false; - auto BA = DFG.getFunc().Addr->findBlock(B, DFG); + NodeAddr<BlockNode*> BA = DFG.findBlock(B); for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { if (DFG.IsCode<NodeAttrs::Stmt>(IA)) { diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h index d5faca4cd6f4..52f390356b26 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h @@ -508,7 +508,8 @@ namespace rdf { static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize, "NodeBase must be at most NodeAllocator::NodeMemSize bytes"); - typedef std::vector<NodeAddr<NodeBase*>> NodeList; +// typedef std::vector<NodeAddr<NodeBase*>> NodeList; + typedef SmallVector<NodeAddr<NodeBase*>,4> NodeList; typedef std::set<NodeId> NodeSet; struct RefNode : public NodeBase { diff --git a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp index 5c5496a548af..4224ded3418b 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.cpp @@ -69,6 +69,19 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, for (const MachineOperand &Op : In.operands()) if (Op.isRegMask()) RegMasks.insert(Op.getRegMask()); + + MaskInfos.resize(RegMasks.size()+1); + for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) { + BitVector PU(TRI.getNumRegUnits()); + const uint32_t *MB = RegMasks.get(M); + for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { + if (!(MB[i/32] & (1u << (i%32)))) + continue; + for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U) + PU.set(*U); + } + MaskInfos[M].Units = PU.flip(); + } } RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { @@ -201,17 +214,8 @@ bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const { bool RegisterAggr::hasAliasOf(RegisterRef RR) const { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll()))) - return true; - } - return false; - } + if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) + return Units.anyCommon(PRI.getMaskUnits(RR.Reg)); for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { std::pair<uint32_t,LaneBitmask> P = *U; @@ -224,15 +228,8 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const { bool RegisterAggr::hasCoverOf(RegisterRef RR) const { if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll()))) - return false; - } - return true; + BitVector T(PRI.getMaskUnits(RR.Reg)); + return T.reset(Units).none(); } for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { @@ -246,15 +243,7 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const { RegisterAggr &RegisterAggr::insert(RegisterRef RR) { if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - BitVector PU(PRI.getTRI().getNumRegUnits()); // Preserved units. - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (!(MB[i/32] & (1u << (i%32)))) - continue; - for (MCRegUnitIterator U(i, &PRI.getTRI()); U.isValid(); ++U) - PU.set(*U); - } - Units |= PU.flip(); + Units |= PRI.getMaskUnits(RR.Reg); return *this; } diff --git a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h index 4b35c85a6b62..314d8b5666d7 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFRegisters.h @@ -51,6 +51,8 @@ namespace rdf { return F - Map.begin() + 1; } + uint32_t size() const { return Map.size(); } + typedef typename std::vector<T>::const_iterator const_iterator; const_iterator begin() const { return Map.begin(); } const_iterator end() const { return Map.end(); } @@ -107,6 +109,9 @@ namespace rdf { RegisterRef getRefForUnit(uint32_t U) const { return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask); } + const BitVector &getMaskUnits(RegisterId MaskId) const { + return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units; + } const TargetRegisterInfo &getTRI() const { return TRI; } @@ -118,11 +123,15 @@ namespace rdf { RegisterId Reg = 0; LaneBitmask Mask; }; + struct MaskInfo { + BitVector Units; + }; const TargetRegisterInfo &TRI; + IndexedSet<const uint32_t*> RegMasks; std::vector<RegInfo> RegInfos; std::vector<UnitInfo> UnitInfos; - IndexedSet<const uint32_t*> RegMasks; + std::vector<MaskInfo> MaskInfos; bool aliasRR(RegisterRef RA, RegisterRef RB) const; bool aliasRM(RegisterRef RR, RegisterRef RM) const; @@ -135,7 +144,7 @@ namespace rdf { : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} RegisterAggr(const RegisterAggr &RG) = default; - bool empty() const { return Units.empty(); } + bool empty() const { return Units.none(); } bool hasAliasOf(RegisterRef RR) const; bool hasCoverOf(RegisterRef RR) const; static bool isCoverOf(RegisterRef RA, RegisterRef RB, diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index c26b3081dbc3..82e6731ecd78 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; void MSP430MCAsmInfo::anchor() { } MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) { - PointerSize = CalleeSaveStackSlotSize = 2; + CodePointerSize = CalleeSaveStackSlotSize = 2; CommentString = ";"; diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index ebe3c5784888..11411d997bb3 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -23,7 +23,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::mips64el) || (TheTriple.getArch() == Triple::mips64)) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } // FIXME: This condition isn't quite right but it's the best we can do until diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 8b04fcb76920..bf79f0f2ff82 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -3781,6 +3781,80 @@ let Predicates = [HasMSA] in { ISA_MIPS1_NOT_32R6_64R6; } +def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def immi32Cst7 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 7;}]>; +def immi32Cst15 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 15;}]>; +def immi32Cst31 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 31;}]>; + +def vsplati8imm7 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati8 immi32Cst7))>; +def vsplati16imm15 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati16 immi32Cst15))>; +def vsplati32imm31 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati32 immi32Cst31))>; +def vsplati64imm63 : PatFrag<(ops node:$wt), + (and node:$wt, vsplati64_imm_eq_63)>; + +class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> : + MSAPat<(VT (Node VT:$ws, (VT (and VT:$wt, Vec)))), + (VT (Insn VT:$ws, VT:$wt))>; + +class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> : + MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + (VT (Insn VT:$ws, VT:$wt))>; + +multiclass MSAShiftPats<SDNode Node, string Insn> { + def : MSAShiftPat<Node, v16i8, !cast<MSAInst>(Insn#_B), + (vsplati8 immi32Cst7)>; + def : MSAShiftPat<Node, v8i16, !cast<MSAInst>(Insn#_H), + (vsplati16 immi32Cst15)>; + def : MSAShiftPat<Node, v4i32, !cast<MSAInst>(Insn#_W), + (vsplati32 immi32Cst31)>; + def : MSAPat<(v2i64 (Node v2i64:$ws, (v2i64 (and v2i64:$wt, + vsplati64_imm_eq_63)))), + (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +multiclass MSABitPats<SDNode Node, string Insn> { + def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>; + def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>; + def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>; + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt))), + (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +defm : MSAShiftPats<shl, "SLL">; +defm : MSAShiftPats<srl, "SRL">; +defm : MSAShiftPats<sra, "SRA">; +defm : MSABitPats<xor, "BNEG">; +defm : MSABitPats<or, "BSET">; + +def : MSAPat<(and v16i8:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$wt)), + immAllOnesV)), + (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; +def : MSAPat<(and v8i16:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$wt)), + immAllOnesV)), + (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; +def : MSAPat<(and v4i32:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$wt)), + immAllOnesV)), + (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; +def : MSAPat<(and v2i64:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt)), + (bitconvert (v4i32 immAllOnesV)))), + (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; + // Vector extraction with fixed index. // // Extracting 32-bit values on MSA32 should always use COPY_S_W rather than diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index e2da8477295b..bf7f079e3105 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1547,11 +1547,24 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); } +static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Vec = Op->getOperand(2); + bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); + MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; + SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, + DL, ResEltTy); + SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); + + return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); +} + static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); SDLoc DL(Op); SDValue One = DAG.getConstant(1, DL, ResTy); - SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), DAG.getNOT(DL, Bit, ResTy)); @@ -1687,7 +1700,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bnegi_b: case Intrinsic::mips_bnegi_h: @@ -1723,7 +1736,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bseti_b: case Intrinsic::mips_bseti_h: @@ -2210,7 +2223,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_slli_b: case Intrinsic::mips_slli_h: case Intrinsic::mips_slli_w: @@ -2240,7 +2253,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srai_b: case Intrinsic::mips_srai_h: case Intrinsic::mips_srai_w: @@ -2270,7 +2283,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srli_b: case Intrinsic::mips_srli_h: case Intrinsic::mips_srli_w: diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 78bdf4e698d8..bdd0f156c8af 100644 --- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -27,7 +27,7 @@ void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) { if (TheTriple.getArch() == Triple::nvptx64) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } CommentString = "//"; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 21e25de80dc7..ba28cd83278b 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -2004,7 +2004,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) { uint8_t Byte = Val.getLoBits(8).getZExtValue(); aggBuffer->addBytes(&Byte, 1, 1); - Val = Val.lshr(8); + Val.lshrInPlace(8); } return; } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index d8fab5b7c01a..d30bf1a56e8a 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -20,7 +20,7 @@ void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { if (is64Bit) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } IsLittleEndian = false; @@ -50,7 +50,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { NeedsLocalForSize = true; if (is64Bit) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } IsLittleEndian = T.getArch() == Triple::ppc64le; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 9c72638023bb..125c00295f88 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2977,10 +2977,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); + SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); return; } diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index b164df8b595a..d622911e92c4 100644 --- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void RISCVMCAsmInfo::anchor() {} RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { - PointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; CommentString = "#"; AlignmentIsInBytes = false; SupportsDebugInformation = true; diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 3ed09898fb78..21df60237d96 100644 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -28,7 +28,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) { IsLittleEndian = (TheTriple.getArch() == Triple::sparcel); if (isV9) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } Data16bitsDirective = "\t.half\t"; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index b17977d41be1..6e00981939b6 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -14,7 +14,7 @@ using namespace llvm; SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { - PointerSize = 8; + CodePointerSize = 8; CalleeSaveStackSlotSize = 8; IsLittleEndian = false; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 84d3c7bed50a..f2fd581f7847 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -829,7 +829,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, return isTruncateFree(FromType, ToType); } -bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 7d92a7355877..1c34dc43e8bb 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -454,7 +454,7 @@ public: MachineBasicBlock *BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; bool allowTruncateForTailCall(Type *, Type *) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 2dcec5263fa1..5f8c78ed1683 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {} WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? @@ -55,7 +55,7 @@ WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) { WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {} WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index a0b008947491..544cd653fd72 100644 --- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -94,6 +94,8 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc())); ++MCNumFixups; encodeULEB128(uint64_t(MO.getImm()), OS); + } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { + encodeSLEB128(int64_t(MO.getImm()), OS); } else { encodeULEB128(uint64_t(MO.getImm()), OS); } diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index f4c9a4ef6b9c..559165e4c86b 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() { void OptimizeReturned::visitCallSite(CallSite CS) { for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) - if (CS.paramHasAttr(0, Attribute::Returned)) { + if (CS.paramHasAttr(i, Attribute::Returned)) { Instruction *Inst = CS.getInstruction(); Value *Arg = CS.getArgOperand(i); // Ignore constants, globals, undef, etc. diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt index 8dd5e8a03e2e..8e8e5fd1eff1 100644 --- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -1,5 +1,15 @@ # Tests which are known to fail from the GCC torture test suite. +# Syntax: Each line has a single test to be marked as a 'known failure' (or +# 'exclusion'. Known failures are expected to fail, and will cause an error if +# they pass. (Known failures that do not run at all will not cause an +# error). The format is +# <name> <attributes> # comment +# +# The attributes in this case represent the different arguments used to +# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling +# to wasm object files (.o). + # Computed gotos are not supported (Cannot select BlockAddress/BRIND) 20040302-1.c 20071210-1.c @@ -66,3 +76,21 @@ pr41935.c 920728-1.c pr28865.c widechar-2.c + +# crash: Running pass 'WebAssembly Explicit Locals' on function +20020107-1.c wasm-o +20030222-1.c wasm-o +20071220-1.c wasm-o +20071220-2.c wasm-o +990130-1.c wasm-o +pr38533.c wasm-o +pr41239.c wasm-o +pr43385.c wasm-o +pr43560.c wasm-o +pr45695.c wasm-o +pr49279.c wasm-o +pr49390.c wasm-o +pr52286.c wasm-o + +# fatal error: error in backend: data symbols must have a size set with .size +921110-1.c wasm-o diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 48a1d8f1330c..9c35a251e480 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -43,7 +43,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -92,7 +92,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // For ELF, x86-64 pointer size depends on the ABI. // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 // with the x32 ABI, pointer size remains the default 4. - PointerSize = (is64Bit && !isX32) ? 8 : 4; + CodePointerSize = (is64Bit && !isX32) ? 8 : 4; // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. CalleeSaveStackSlotSize = is64Bit ? 8 : 4; @@ -129,7 +129,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; } else { // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just @@ -156,7 +156,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; ExceptionsType = ExceptionHandling::WinEH; } else { diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 78e0bca4158e..8678a13b95d0 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1698,21 +1698,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -// NOTE: this only has a subset of the full frame index logic. In -// particular, the FI < 0 and AfterFPPop logic is handled in -// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly -// (probably?) it should be moved into here. int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsFixed = MFI.isFixedObjectIndex(FI); // We can't calculate offset from frame pointer if the stack is realigned, // so enforce usage of stack/base pointer. The base pointer is used when we // have dynamic allocas in addition to dynamic realignment. if (TRI->hasBasePointer(MF)) - FrameReg = TRI->getBaseRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); else if (TRI->needsStackRealignment(MF)) - FrameReg = TRI->getStackRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); else FrameReg = TRI->getFrameRegister(MF); diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index eb5c56ff2ff9..2d788bf0cf99 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1311,8 +1311,9 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, ++Cost; // If the base is a register with multiple uses, this // transformation may save a mov. - if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() && + // FIXME: Don't rely on DELETED_NODEs. + if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && + AM.Base_Reg->getOpcode() != ISD::DELETED_NODE && !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 6bf3672c3c08..b5f29fb400ef 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2742,13 +2742,13 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } -bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; - CallSite CS(CI); + ImmutableCallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); if (!mayTailCallThisCC(CalleeCC)) return false; @@ -8327,13 +8327,13 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, Zeroable.setBit(i); else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { APInt Val = Cst->getAPIntValue(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); @@ -16069,7 +16069,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { unsigned EltBits = EltVT.getSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = - IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = EltVT == MVT::f64 ? APFloat::IEEEdouble() : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle()); @@ -16132,9 +16132,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // The mask constants are automatically splatted for vector types. unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue SignMask = DAG.getConstantFP( - APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); SDValue MagMask = DAG.getConstantFP( - APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT); // First, clear all bits but the sign bit from the second operand (sign). if (IsFakeVector) @@ -17344,10 +17344,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); - SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, + SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -22111,11 +22111,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } // i64 vector arithmetic shift can be emulated with the transform: - // M = lshr(SIGN_BIT, Amt) + // M = lshr(SIGN_MASK, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Op.getOpcode() == ISD::SRA) { - SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); + SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); @@ -22647,7 +22647,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - auto PTy = cast<PointerType>(LI->getPointerOperand()->getType()); + auto PTy = cast<PointerType>(LI->getPointerOperandType()); return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } @@ -26722,8 +26722,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // Low bits are known zero. KnownZero.setLowBits(ShAmt); } else { - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + KnownZero.lshrInPlace(ShAmt); + KnownOne.lshrInPlace(ShAmt); // High bits are known zero. KnownZero.setHighBits(ShAmt); } @@ -30152,7 +30152,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - OpRHSConst->getAPIntValue().isSignBit()) + OpRHSConst->getAPIntValue().isSignMask()) // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return DAG.getNode( @@ -30203,7 +30203,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask(APInt::getSignBit(BitWidth)); + APInt DemandedMask(APInt::getSignMask(BitWidth)); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); @@ -31269,7 +31269,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, else if (X86ISD::VSRAI == Opcode) Elt = Elt.ashr(ShiftImm); else - Elt = Elt.lshr(ShiftImm); + Elt.lshrInPlace(ShiftImm); } return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } @@ -32234,8 +32234,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V); if (!BV || !BV->isConstant()) return false; - for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i)); + for (SDValue Op : V->ops()) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); if (!C) return false; uint64_t Val = C->getZExtValue(); @@ -33428,8 +33428,8 @@ static SDValue isFNEG(SDNode *N) { SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); unsigned EltBits = Op1.getScalarValueSizeInBits(); - auto isSignBitValue = [&](const ConstantFP *C) { - return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits); + auto isSignMask = [&](const ConstantFP *C) { + return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits); }; // There is more than one way to represent the same constant on @@ -33440,21 +33440,21 @@ static SDValue isFNEG(SDNode *N) { // We check all variants here. if (Op1.getOpcode() == X86ISD::VBROADCAST) { if (auto *C = getTargetConstantFromNode(Op1.getOperand(0))) - if (isSignBitValue(cast<ConstantFP>(C))) + if (isSignMask(cast<ConstantFP>(C))) return Op0; } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) { if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode()) - if (isSignBitValue(CN->getConstantFPValue())) + if (isSignMask(CN->getConstantFPValue())) return Op0; } else if (auto *C = getTargetConstantFromNode(Op1)) { if (C->getType()->isVectorTy()) { if (auto *SplatV = C->getSplatValue()) - if (isSignBitValue(cast<ConstantFP>(SplatV))) + if (isSignMask(cast<ConstantFP>(SplatV))) return Op0; } else if (auto *FPConst = dyn_cast<ConstantFP>(C)) - if (isSignBitValue(FPConst)) + if (isSignMask(FPConst)) return Op0; } return SDValue(); @@ -34631,7 +34631,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); ShrinkMode Mode; - if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode)) + if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16) return SDValue(); EVT VT = N->getValueType(0); diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index ab4910daca02..190a88335000 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -1207,7 +1207,7 @@ namespace llvm { bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override; diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp index 6cc5e8b63597..fb9315792892 100644 --- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -67,6 +67,8 @@ private: MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; const X86Subtarget &STI; const X86InstrInfo &TII; @@ -99,6 +101,10 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { if (RB.getID() == X86::GPRRegBankID) { + if (Ty.getSizeInBits() <= 8) + return &X86::GR8RegClass; + if (Ty.getSizeInBits() == 16) + return &X86::GR16RegClass; if (Ty.getSizeInBits() == 32) return &X86::GR32RegClass; if (Ty.getSizeInBits() == 64) @@ -207,6 +213,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectConstant(I, MRI, MF)) return true; + if (selectTrunc(I, MRI, MF)) + return true; return selectImpl(I); } @@ -509,6 +517,59 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectTrunc(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_TRUNC) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + + if (DstRB.getID() != SrcRB.getID()) { + DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + return false; + } + + if (DstRB.getID() != X86::GPRRegBankID) + return false; + + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); + if (!DstRC) + return false; + + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); + if (!SrcRC) + return false; + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + if (DstRC == SrcRC) { + // Nothing to be done + } else if (DstRC == &X86::GR32RegClass) { + I.getOperand(1).setSubReg(X86::sub_32bit); + } else if (DstRC == &X86::GR16RegClass) { + I.getOperand(1).setSubReg(X86::sub_16bit); + } else if (DstRC == &X86::GR8RegClass) { + I.getOperand(1).setSubReg(X86::sub_8bit); + } else { + return false; + } + + I.setDesc(TII.get(X86::COPY)); + return true; +} + InstructionSelector * llvm::createX86InstructionSelector(X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { diff --git a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index d395c826e6bf..0f8a750a0235 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -68,6 +68,7 @@ X86GenRegisterBankInfo::PartialMappingIdx X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { if ((Ty.isScalar() && !isFP) || Ty.isPointer()) { switch (Ty.getSizeInBits()) { + case 1: case 8: return PMI_GPR8; case 16: diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h index 58fa31e94fba..25958f0c3106 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,11 @@ public: unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } unsigned getBaseRegister() const { return BasePtr; } + /// Returns physical register used as frame pointer. + /// This will always returns the frame pointer register, contrary to + /// getFrameRegister() which returns the "base pointer" in situations + /// involving a stack, frame and base pointer. + unsigned getFramePtr() const { return FramePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } }; diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 375b74c494d9..8e26849ea9e3 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -167,15 +167,12 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeList PAL = CS.getAttributes(); - if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { - SmallVector<AttributeList, 8> AttributesVec; - for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) - AttributesVec.push_back(PAL.getSlotAttributes(i)); - if (PAL.hasAttributes(AttributeList::FunctionIndex)) - AttributesVec.push_back(AttributeList::get(Fn.getContext(), - AttributeList::FunctionIndex, - PAL.getFnAttributes())); - PAL = AttributeList::get(Fn.getContext(), AttributesVec); + if (!PAL.isEmpty()) { + SmallVector<AttributeSet, 8> ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) + ArgAttrs.push_back(PAL.getParamAttributes(ArgNo)); + PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(), + PAL.getRetAttributes(), ArgAttrs); } SmallVector<OperandBundleDef, 1> OpBundles; diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 4d13b3f40688..9648883b7f27 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -222,15 +222,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { MadeChange = true; // Clear out any existing attributes. - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - F->removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B)); + F->removeFnAttr(Attribute::ReadOnly); + F->removeFnAttr(Attribute::ReadNone); // Add in the new attribute. - F->addAttribute(AttributeList::FunctionIndex, - ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); + F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); if (ReadsMemory) ++NumReadOnly; @@ -495,9 +491,6 @@ determinePointerReadAttrs(Argument *A, static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; - AttrBuilder B; - B.addAttribute(Attribute::Returned); - // Check each function in turn, determining if an argument is always returned. for (Function *F : SCCNodes) { // We can infer and propagate function attributes only when we know that the @@ -535,7 +528,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { if (Value *RetArg = FindRetArg()) { auto *A = cast<Argument>(RetArg); - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::Returned); ++NumReturned; Changed = true; } @@ -593,9 +586,6 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { ArgumentGraph AG; - AttrBuilder B; - B.addAttribute(Attribute::NoCapture); - // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { @@ -614,7 +604,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -633,8 +623,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr( - AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } else { @@ -660,9 +649,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { - AttrBuilder B; - B.addAttribute(R); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(R); Changed = true; R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; } @@ -687,7 +674,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { Argument *A = ArgumentSCC[0]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -729,7 +716,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -760,15 +747,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { } if (ReadAttr != Attribute::None) { - AttrBuilder B, R; - B.addAttribute(ReadAttr); - R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes - A->removeAttr( - AttributeList::get(A->getContext(), A->getArgNo() + 1, R)); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(ReadAttr); ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; Changed = true; } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index ade4f21ceb52..ae9d4ce11e0d 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1979,16 +1979,11 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) { - for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - unsigned Index = Attrs.getSlotIndex(i); - if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) - continue; - - // There can be only one. - return Attrs.removeAttribute(C, Index, Attribute::Nest); - } - +static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) { + // There can be at most one attribute set with a nest attribute. + unsigned NestIndex; + if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex)) + return Attrs.removeAttribute(C, NestIndex, Attribute::Nest); return Attrs; } diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp index 3371de6e3d14..e755e2bd8f26 100644 --- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/SampleProfReader.h" @@ -208,6 +209,12 @@ protected: /// the same number of times. EquivalenceClassMap EquivalenceClass; + /// Map from function name to Function *. Used to find the function from + /// the function name. If the function name contains suffix, additional + /// entry is added to map from the stripped name to the function if there + /// is one-to-one mapping. + StringMap<Function *> SymbolMap; + /// \brief Dominance, post-dominance and loop information. std::unique_ptr<DominatorTree> DT; std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT; @@ -670,7 +677,7 @@ bool SampleProfileLoader::inlineHotFunctions( for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && - (FS = findCalleeFunctionSamples(I))) { + !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { Candidates.push_back(&I); if (callsiteIsHot(Samples, FS)) Hot = true; @@ -689,7 +696,10 @@ bool SampleProfileLoader::inlineHotFunctions( for (const auto *FS : findIndirectCallFunctionSamples(*I)) { auto CalleeFunctionName = FS->getName(); const char *Reason = "Callee function not available"; - CalledFunction = F.getParent()->getFunction(CalleeFunctionName); + auto R = SymbolMap.find(CalleeFunctionName); + if (R == SymbolMap.end()) + continue; + CalledFunction = R->getValue(); if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { // The indirect target was promoted and inlined in the profile, as a // result, we do not have profile info for the branch probability. @@ -1181,8 +1191,11 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) continue; + DebugLoc BranchLoc = TI->getDebugLoc(); DEBUG(dbgs() << "\nGetting weights for branch at line " - << TI->getDebugLoc().getLine() << ".\n"); + << ((BranchLoc) ? Twine(BranchLoc.getLine()) + : Twine("<UNKNOWN LOCATION>")) + << ".\n"); SmallVector<uint32_t, 4> Weights; uint32_t MaxWeight = 0; DebugLoc MaxDestLoc; @@ -1219,7 +1232,6 @@ void SampleProfileLoader::propagateWeights(Function &F) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); - DebugLoc BranchLoc = TI->getDebugLoc(); emitOptimizationRemark( Ctx, DEBUG_TYPE, F, MaxDestLoc, Twine("most popular destination for conditional branches at ") + @@ -1414,6 +1426,26 @@ bool SampleProfileLoader::runOnModule(Module &M) { for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); + // Populate the symbol map. + for (const auto &N_F : M.getValueSymbolTable()) { + std::string OrigName = N_F.getKey(); + Function *F = dyn_cast<Function>(N_F.getValue()); + if (F == nullptr) + continue; + SymbolMap[OrigName] = F; + auto pos = OrigName.find('.'); + if (pos != std::string::npos) { + std::string NewName = OrigName.substr(0, pos); + auto r = SymbolMap.insert(std::make_pair(NewName, F)); + // Failiing to insert means there is already an entry in SymbolMap, + // thus there are multiple functions that are mapped to the same + // stripped name. In this case of name conflicting, set the value + // to nullptr to avoid confusion. + if (!r.second) + r.first->second = nullptr; + } + } + bool retval = false; for (auto &F : M) if (!F.isDeclaration()) { diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 65deb82cd2a5..9801a0a61416 100644 --- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -363,6 +363,7 @@ void splitAndWriteThinLTOBitcode( W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); W.writeModule(MergedM.get()); + W.writeStrtab(); OS << Buffer; // If a minimized bitcode module was requested for the thin link, @@ -375,6 +376,7 @@ void splitAndWriteThinLTOBitcode( W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); W2.writeModule(MergedM.get()); + W2.writeStrtab(); *ThinLinkOS << Buffer; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 174ec8036274..e30a4bafb9b0 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1044,14 +1044,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const APInt *RHSC; if (match(RHS, m_APInt(RHSC))) { - if (RHSC->isSignBit()) { + if (RHSC->isSignMask()) { // If wrapping is not allowed, then the addition must set the sign bit: - // X + (signbit) --> X | signbit + // X + (signmask) --> X | signmask if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) return BinaryOperator::CreateOr(LHS, RHS); // If wrapping is allowed, then the addition flips the sign bit of LHS: - // X + (signbit) --> X ^ signbit + // X + (signmask) --> X ^ signmask return BinaryOperator::CreateXor(LHS, RHS); } @@ -1120,9 +1120,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } - // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, - // transform them into (X + (signbit ^ C)) - if (XorRHS->getValue().isSignBit()) + // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, + // transform them into (X + (signmask ^ C)) + if (XorRHS->getValue().isSignMask()) return BinaryOperator::CreateAdd(XorLHS, ConstantExpr::getXor(XorRHS, CI)); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b2a41c699202..3a98e8937bda 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2078,7 +2078,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast<Instruction>(Op0)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } // Y|(X^C) -> (X|Y)^C iff Y&C == 0 @@ -2087,7 +2087,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast<Instruction>(Op1)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } } @@ -2480,8 +2480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); - } else if (RHSC->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) + } else if (RHSC->getValue().isSignMask()) { + // (X + C) ^ signmask -> (X + C + signmask) Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 69484f47223f..e7aa1a457371 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -839,7 +839,8 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, // Length bits. if (CI0) { APInt Elt = CI0->getValue(); - Elt = Elt.lshr(Index).zextOrTrunc(Length); + Elt.lshrInPlace(Index); + Elt = Elt.zextOrTrunc(Length); return LowConstantHighUndef(Elt.getZExtValue()); } @@ -1036,7 +1037,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II, // The PD variants uses bit 1 to select per-lane element index, so // shift down to convert to generic shuffle mask index. if (IsPD) - Index = Index.lshr(1); + Index.lshrInPlace(1); // The _256 variants are a bit trickier since the mask bits always index // into the corresponding 128 half. In order to convert to a generic @@ -4067,21 +4068,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && - !CallerPAL.isEmpty()) + !CallerPAL.isEmpty()) { // In this case we have more arguments than the new function type, but we // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. - for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - unsigned Index = CallerPAL.getSlotIndex(i - 1); - if (Index <= FT->getNumParams()) - break; - - // Check if it has an attribute that's incompatible with varargs. - AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1); - if (PAttrs.hasAttribute(Index, Attribute::StructRet)) - return false; - } - + unsigned SRetIdx; + if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) && + SRetIdx > FT->getNumParams()) + return false; + } // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 25683132c786..9127ddca5915 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1591,7 +1591,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // GEP into CI would undo canonicalizing addrspacecast with different // pointer types, causing infinite loops. (!isa<AddrSpaceCastInst>(CI) || - GEP->getType() == GEP->getPointerOperand()->getType())) { + GEP->getType() == GEP->getPointerOperandType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index bbafa9e9f468..003029ae39d5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -140,7 +140,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; - return RHS.isSignBit(); + return RHS.isSignMask(); default: return false; } @@ -1532,14 +1532,14 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, } if (Xor->hasOneUse()) { - // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit)) - if (!Cmp.isEquality() && XorC->isSignBit()) { + // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) + if (!Cmp.isEquality() && XorC->isSignMask()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); } - // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit)) + // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); @@ -2402,9 +2402,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, const APInt &Upper = CR.getUpper(); const APInt &Lower = CR.getLower(); if (Cmp.isSigned()) { - if (Lower.isSignBit()) + if (Lower.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); - if (Upper.isSignBit()) + if (Upper.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); } else { if (Lower.isMinValue()) @@ -2604,7 +2604,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignBit()) { + if (BOC->isSignMask()) { Constant *Zero = Constant::getNullValue(BOp0->getType()); auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(NewPred, BOp0, Zero); @@ -3032,9 +3032,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b return new ICmpInst(I.getPredicate(), BO0->getOperand(0), BO1->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { - if (CI->getValue().isSignBit()) { + if (CI->getValue().isSignMask()) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -3797,7 +3797,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) - return APInt::getSignBit(BitWidth); + return APInt::getSignMask(BitWidth); ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6288e054f1bc..675553017838 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -931,6 +931,18 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, return nullptr; } +static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0) + return true; + } + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) + return true; + return false; +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); @@ -979,27 +991,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (!LI.isUnordered()) return nullptr; // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(Op) || - (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. + // TODO: Consider a target hook for valid address spaces for this xforms. + if (canSimplifyNullLoadOrGEP(LI, Op)) { + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f1ac82057e6c..ce66581a491a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -944,22 +944,21 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) { - if (One->isOne() && !I.getType()->isIntegerTy(1)) { - bool isSigned = I.getOpcode() == Instruction::SDiv; - if (isSigned) { - // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the - // result is one, if Op1 is -1 then the result is minus one, otherwise - // it's zero. - Value *Inc = Builder->CreateAdd(Op1, One); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); - return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); - } else { - // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the - // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); - } + if (match(Op0, m_One())) { + assert(!I.getType()->getScalarType()->isIntegerTy(1) && + "i1 divide not removed?"); + if (I.getOpcode() == Instruction::SDiv) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, Op0); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1238,25 +1237,23 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask, 0, &I)) { - if (MaskedValueIsZero(Op1, Mask, 0, &I)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op0, Mask, 0, &I)) { + if (MaskedValueIsZero(Op1, Mask, 0, &I)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; + } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; } } @@ -1546,13 +1543,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask, 0, &I) && - MaskedValueIsZero(Op0, Mask, 0, &I)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op1, Mask, 0, &I) && + MaskedValueIsZero(Op0, Mask, 0, &I)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } // If it's a constant vector, flip any negative values positive. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 693b6c95c169..5d6d899da4b5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -618,7 +618,7 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, { unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); - APInt MinSignedValue = APInt::getSignBit(BitWidth); + APInt MinSignedValue = APInt::getSignedMinValue(BitWidth); Value *X; const APInt *Y, *C; bool TrueWhenUnset; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9aa679c60e47..f77d713b9b07 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -370,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, MaskV <<= Op1C->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1C->getZExtValue()); + MaskV.lshrInPlace(Op1C->getZExtValue()); } // shift1 & 0x00FF @@ -760,7 +760,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I)) + if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); return nullptr; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 4e6f02058d83..2ba052b7e02d 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -38,7 +38,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, // If there are no bits set that aren't demanded, nothing to do. Demanded = Demanded.zextOrTrunc(C->getBitWidth()); - if ((~Demanded & *C) == 0) + if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. @@ -117,27 +117,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne.getBitWidth() == BitWidth && "Value *V, DemandedMask, KnownZero and KnownOne " "must have same BitWidth"); - const APInt *C; - if (match(V, m_APInt(C))) { - // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return nullptr; - } - if (isa<ConstantPointerNull>(V)) { - // We know all of the bits for a constant! - KnownOne.clearAllBits(); - KnownZero = DemandedMask; + + if (isa<Constant>(V)) { + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa<UndefValue>(V)) - return nullptr; + if (DemandedMask == 0) // Not demanding any bits from V. return UndefValue::get(VTy); - } if (Depth == 6) // Limit search depth. return nullptr; @@ -187,16 +176,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -224,25 +211,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -271,20 +247,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -295,14 +271,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstWith(And, *I); - } + if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) && + RHSKnownOne.isSubsetOf(LHSKnownOne)) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } // If the RHS is a constant, see if we can simplify it. @@ -529,9 +503,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownZero.setLowBits(ShiftAmt); } break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + case Instruction::LShr: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. @@ -546,13 +520,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownOne.lshrInPlace(ShiftAmt); if (ShiftAmt) KnownZero.setHighBits(ShiftAmt); // high bits known zero. } break; - case Instruction::AShr: + } + case Instruction::AShr: { // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless @@ -566,15 +541,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the sign bit is the only bit demanded by this ashr, then there is no // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) + if (DemandedMask.isSignMask()) return I->getOperand(0); - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as + // If any of the high bits are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) DemandedMaskIn.setSignBit(); @@ -587,31 +563,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, Depth + 1)) return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownOne.lshrInPlace(ShiftAmt); // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); + APInt SignMask(APInt::getSignMask(BitWidth)); // Adjust to where it is now in the mask. - SignBit = SignBit.lshr(ShiftAmt); + SignMask.lshrInPlace(ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), - SA, I->getName()); - NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); - return InsertNewInstWith(NewVal, *I); - } else if ((KnownOne & SignBit) != 0) { // New bits are known one. + BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), + I->getOperand(1)); + LShr->setIsExact(cast<BinaryOperator>(I)->isExact()); + return InsertNewInstWith(LShr, *I); + } else if ((KnownOne & SignMask) != 0) { // New bits are known one. KnownOne |= HighBits; } } break; + } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce @@ -624,7 +601,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(0); APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, Depth + 1)) return I; @@ -635,26 +612,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits)) + if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits)) KnownZero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0)) + if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0)) KnownOne |= ~LowBits; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + if (DemandedMask.isSignBitSet()) { computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isNegative()) + if (LHSKnownZero.isSignBitSet()) KnownZero.setSignBit(); } break; @@ -744,7 +721,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(VTy, KnownOne); return nullptr; } @@ -783,17 +760,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -817,26 +792,15 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -861,14 +825,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -883,7 +847,7 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If this user is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(ITy, KnownOne); break; @@ -1641,7 +1605,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts.setHighBits(VWidth / 2); break; case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { + case Intrinsic::amdgcn_buffer_load_format: + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + case Intrinsic::amdgcn_image_getlod: { if (VWidth == 1 || !DemandedElts.isMask()) return nullptr; @@ -1656,8 +1665,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); - Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), - NewTy); + auto IID = II->getIntrinsicID(); + + bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load || + IID == Intrinsic::amdgcn_buffer_load_format; + + Function *NewIntrin = IsBuffer ? + Intrinsic::getDeclaration(M, IID, NewTy) : + // Samplers have 3 mangled types. + Intrinsic::getDeclaration(M, IID, + { NewTy, II->getArgOperand(0)->getType(), + II->getArgOperand(1)->getType()}); SmallVector<Value *, 5> Args; for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) @@ -1669,6 +1687,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); + + if (!IsBuffer) { + ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3)); + if (DMask) { + unsigned DMaskVal = DMask->getZExtValue() & 0xf; + + unsigned PopCnt = 0; + unsigned NewDMask = 0; + for (unsigned I = 0; I < 4; ++I) { + const unsigned Bit = 1 << I; + if (!!(DMaskVal & Bit)) { + if (++PopCnt > NewNumElts) + break; + + NewDMask |= Bit; + } + } + + NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask)); + } + } + + if (NewNumElts == 1) { return Builder->CreateInsertElement(UndefValue::get(V->getType()), NewCall, static_cast<uint64_t>(0)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 88ef17bbc8fa..81f2d9fa179f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -148,9 +148,9 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { bool Overflow = false; if (Opcode == Instruction::Add) - BVal->sadd_ov(*CVal, Overflow); + (void)BVal->sadd_ov(*CVal, Overflow); else - BVal->ssub_ov(*CVal, Overflow); + (void)BVal->ssub_ov(*CVal, Overflow); return !Overflow; } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 94cfc69ed555..036dd8d39a08 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2586,7 +2586,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index fa0c7cc5a4c5..8bdd917a0596 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -59,13 +59,8 @@ using namespace llvm; static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const SanCovName = "__sanitizer_cov"; static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; -static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTraceEnterName = - "__sanitizer_cov_trace_func_enter"; -static const char *const SanCovTraceBBName = - "__sanitizer_cov_trace_basic_block"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1"; static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2"; @@ -86,8 +81,7 @@ static const char *const SanCovTracePCGuardInitName = static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges, " - "4: above plus indirect calls"), + "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); static cl::opt<unsigned> ClCoverageBlockThreshold( @@ -96,12 +90,6 @@ static cl::opt<unsigned> ClCoverageBlockThreshold( " more than this number of blocks."), cl::Hidden, cl::init(0)); -static cl::opt<bool> - ClExperimentalTracing("sanitizer-coverage-experimental-tracing", - cl::desc("Experimental basic-block tracing: insert " - "callbacks at every basic block"), - cl::Hidden, cl::init(false)); - static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", cl::desc("Experimental pc tracing"), cl::Hidden, cl::init(false)); @@ -128,16 +116,6 @@ static cl::opt<bool> cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, cl::init(true)); -// Experimental 8-bit counters used as an additional search heuristic during -// coverage-guided fuzzing. -// The counters are not thread-friendly: -// - contention on these counters may cause significant slowdown; -// - the counter updates are racy and the results may be inaccurate. -// They are also inaccurate due to 8-bit integer overflow. -static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters", - cl::desc("Experimental 8-bit counters"), - cl::Hidden, cl::init(false)); - namespace { SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { @@ -168,11 +146,9 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); Options.IndirectCalls |= CLOpts.IndirectCalls; - Options.TraceBB |= ClExperimentalTracing; Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.Use8bitCounters |= ClUse8bitCounters; Options.TracePC |= ClExperimentalTracePC; Options.TracePCGuard |= ClTracePCGuard; return Options; @@ -212,16 +188,15 @@ private: bool UseCalls); unsigned NumberOfInstrumentedBlocks() { return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + - SanCovTraceEnter->getNumUses(); + SanCovWithCheckFunction->getNumUses(); } StringRef getSanCovTracePCGuardSection() const; StringRef getSanCovTracePCGuardSectionStart() const; StringRef getSanCovTracePCGuardSectionEnd() const; Function *SanCovFunction; Function *SanCovWithCheckFunction; - Function *SanCovIndirCallFunction, *SanCovTracePCIndir; - Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard; + Function *SanCovTracePCIndir; + Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; Function *SanCovTraceDivFunction[2]; Function *SanCovTraceGepFunction; @@ -235,7 +210,6 @@ private: GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - GlobalVariable *EightBitCounterArray; bool HasSancovGuardsSection; SanitizerCoverageOptions Options; @@ -267,9 +241,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); - SanCovIndirCallFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy)); SanCovTraceCmpFunction[0] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty())); @@ -305,24 +276,15 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovTracePCName, VoidTy)); SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTracePCGuardName, VoidTy, Int32PtrTy)); - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy)); // At this point we create a dummy array of guards because we don't // know how many elements we will need. Type *Int32Ty = IRB.getInt32Ty(); - Type *Int8Ty = IRB.getInt8Ty(); if (!Options.TracePCGuard) GuardArray = new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, "__sancov_gen_cov_tmp"); - if (Options.Use8bitCounters) - EightBitCounterArray = - new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); for (auto &F : M) runOnFunction(F); @@ -344,20 +306,6 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { GuardArray->eraseFromParent(); } - GlobalVariable *RealEightBitCounterArray; - if (Options.Use8bitCounters) { - // Make sure the array is 16-aligned. - static const int CounterAlignment = 16; - Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment)); - RealEightBitCounterArray = new GlobalVariable( - M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); - RealEightBitCounterArray->setAlignment(CounterAlignment); - EightBitCounterArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); - EightBitCounterArray->eraseFromParent(); - } - // Create variable for module (compilation unit) name Constant *ModNameStrConst = ConstantDataArray::getString(M.getContext(), M.getName(), true); @@ -396,10 +344,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M, SanCovModuleCtorName, SanCovModuleInitName, {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), - Options.Use8bitCounters - ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) - : Constant::getNullValue(Int8PtrTy), + ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy), IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); @@ -566,26 +511,15 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - const int CacheSize = 16; - const int CacheAlignment = 64; // Align for better performance. - Type *Ty = ArrayType::get(IntptrTy, CacheSize); + if (!Options.TracePC && !Options.TracePCGuard) + return; for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); if (isa<InlineAsm>(Callee)) continue; - GlobalVariable *CalleeCache = new GlobalVariable( - *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); - CalleeCache->setAlignment(CacheAlignment); - if (Options.TracePC || Options.TracePCGuard) - IRB.CreateCall(SanCovTracePCIndir, - IRB.CreatePointerCast(Callee, IntptrTy)); - else - IRB.CreateCall(SanCovIndirCallFunction, - {IRB.CreatePointerCast(Callee, IntptrTy), - IRB.CreatePointerCast(CalleeCache, IntptrTy)}); + IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); } } @@ -735,9 +669,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (Options.TraceBB) { - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } else if (UseCalls) { + if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); @@ -755,19 +687,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } } - - if (Options.Use8bitCounters) { - IRB.SetInsertPoint(&*IP); - Value *P = IRB.CreateAdd( - IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), - ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); - P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); - LoadInst *LI = IRB.CreateLoad(P); - Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); - StoreInst *SI = IRB.CreateStore(Inc, P); - SetNoSanitizeMetadata(LI); - SetNoSanitizeMetadata(SI); - } } StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 6adfe130d148..b7514a6d5793 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -45,6 +45,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" @@ -1010,6 +1011,7 @@ public: AU.addRequired<MemorySSAWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<MemorySSAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); } }; } // namespace @@ -1026,6 +1028,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<MemorySSAAnalysis>(); + PA.preserve<GlobalsAA>(); return PA; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index cf63cb660db8..20b37c4b70e6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -197,8 +197,7 @@ public: continue; // Only progagate the value if they are of the same type. - if (Store->getPointerOperand()->getType() != - Load->getPointerOperand()->getType()) + if (Store->getPointerOperandType() != Load->getPointerOperandType()) continue; Candidates.emplace_front(Load, Store); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index 86058fe0b1aa..fd15a9014def 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -557,7 +557,7 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { Instruction *UUser = dyn_cast<Instruction>(UU); // Skip SExt if we are extending an nsw value // TODO: Allow ZExt too - if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 && + if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() && isa<SExtInst>(UUser)) UUser = dyn_cast<Instruction>(*(UUser->user_begin())); if (!isCompareUsedByBranch(UUser)) @@ -852,7 +852,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) { for (auto &KV : Roots) { if (KV.first == 0) continue; - if (KV.second->getNumUses() != NumBaseUses) { + if (!KV.second->hasNUses(NumBaseUses)) { DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: " << "#Base=" << NumBaseUses << ", #Root=" << KV.second->getNumUses() << "\n"); @@ -867,7 +867,7 @@ void LoopReroll::DAGRootTracker:: findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { // Does the user look like it could be part of a root set? // All its users must be simple arithmetic ops. - if (I->getNumUses() > IL_MaxRerollIterations) + if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1)) return; if (I != IV && findRootsBase(I, SubsumedInsts)) diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index 3d8ce888867e..a014ddd9ba0a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -138,7 +138,8 @@ PHIExpression::~PHIExpression() = default; // It also wants to hand us SCC's that are unrelated to the phi node we ask // about, and have us process them there or risk redoing work. // Graph traits over a filter iterator also doesn't work that well here. -// This SCC finder is specialized to walk use-def chains, and only follows instructions, +// This SCC finder is specialized to walk use-def chains, and only follows +// instructions, // not generic values (arguments, etc). struct TarjanSCC { @@ -170,8 +171,10 @@ private: Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); } } - // See if we really were the root of a component, by seeing if we still have our DFSNumber. - // If we do, we are the root of the component, and we have completed a component. If we do not, + // See if we really were the root of a component, by seeing if we still have + // our DFSNumber. + // If we do, we are the root of the component, and we have completed a + // component. If we do not, // we are not the root of a component, and belong on the component stack. if (Root.lookup(I) == OurDFS) { unsigned ComponentID = Components.size(); @@ -2254,12 +2257,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) { MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = createMemoryClass(MSSA->getLiveOnEntryDef()); - for (auto &B : F) { + for (auto DTN : nodes(DT)) { + BasicBlock *BB = DTN->getBlock(); // All MemoryAccesses are equivalent to live on entry to start. They must // be initialized to something so that initial changes are noticed. For // the maximal answer, we initialize them all to be the same as // liveOnEntry. - auto *MemoryBlockDefs = MSSA->getBlockDefs(&B); + auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); if (MemoryBlockDefs) for (const auto &Def : *MemoryBlockDefs) { MemoryAccessToClass[&Def] = TOPClass; @@ -2274,7 +2278,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) { if (MD && isa<StoreInst>(MD->getMemoryInst())) TOPClass->incStoreCount(); } - for (auto &I : B) { + for (auto &I : *BB) { // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa<TerminatorInst>(I) && I.getType()->isVoidTy()) @@ -2518,14 +2522,11 @@ void NewGVN::verifyMemoryCongruency() const { auto ReachableAccessPred = [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) { bool Result = ReachableBlocks.count(Pair.first->getBlock()); - if (!Result) + if (!Result || MSSA->isLiveOnEntryDef(Pair.first) || + MemoryToDFSNum(Pair.first) == 0) return false; - if (MSSA->isLiveOnEntryDef(Pair.first)) - return true; if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); - if (MemoryToDFSNum(Pair.first) == 0) - return false; return true; }; @@ -2719,25 +2720,13 @@ bool NewGVN::runGVN() { } // Now a standard depth first ordering of the domtree is equivalent to RPO. - auto DFI = df_begin(DT->getRootNode()); - for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) { - BasicBlock *B = DFI->getBlock(); + for (auto DTN : depth_first(DT->getRootNode())) { + BasicBlock *B = DTN->getBlock(); const auto &BlockRange = assignDFSNumbers(B, ICount); BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } - // Handle forward unreachable blocks and figure out which blocks - // have single preds. - for (auto &B : F) { - // Assign numbers to unreachable blocks. - if (!DFI.nodeVisited(DT->getNode(&B))) { - const auto &BlockRange = assignDFSNumbers(&B, ICount); - BlockInstRange.insert({&B, BlockRange}); - ICount += BlockRange.second - BlockRange.first; - } - } - TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as // that can be quite expensive. At most, we have one expression per diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce0262c97b..659353e912fe 100644 --- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) { if (Instruction *Inst = dyn_cast<Instruction>(Condition)) { // Third: Check all the users for an invert BasicBlock *Parent = Inst->getParent(); - for (User *U : Condition->users()) - if (Instruction *I = dyn_cast<Instruction>(U)) + for (User *U : Condition->users()) { + if (Instruction *I = dyn_cast<Instruction>(U)) { if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; + } + } + + // Avoid creating a new instruction in the common case of a compare. + if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) { + if (Cmp->hasOneUse()) { + Cmp->setPredicate(Cmp->getInversePredicate()); + return Cmp; + } + } // Last option: Create a new instruction return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp index 60ae3745c835..9f4d9c7e3981 100644 --- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, default: return false; case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignBit) != 0. + // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignBit) == 0. + // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnesValue()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 644d93b727b3..82552684b832 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -112,24 +112,6 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { return buildExtractionBlockSet(BBs.begin(), BBs.end()); } -/// \brief Helper to call buildExtractionBlockSet with a RegionNode. -static SetVector<BasicBlock *> -buildExtractionBlockSet(const RegionNode &RN) { - if (!RN.isSubRegion()) - // Just a single BasicBlock. - return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>()); - - const Region &R = *RN.getNodeAs<Region>(); - - return buildExtractionBlockSet(R.block_begin(), R.block_end()); -} - -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} - CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) @@ -143,12 +125,6 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 49b4bd92faf4..089f2b5f3b18 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -85,6 +85,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); + assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); assert(L && "Instruction belongs to a BB that's not part of a loop"); @@ -96,13 +97,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, if (ExitBlocks.empty()) continue; - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (I->getType()->isTokenTy()) - continue; - for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); @@ -214,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. - for (auto *PostProcessPN : PostProcessPHIs) { - if (PostProcessPN->use_empty()) - continue; - - // Reprocess each PHI instruction. - Worklist.push_back(PostProcessPN); - } + for (auto *PostProcessPN : PostProcessPHIs) + if (!PostProcessPN->use_empty()) + Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. @@ -241,7 +231,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Compute the set of BasicBlocks in the loop `L` dominating at least one exit. static void computeBlocksDominatingExits( Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, - SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) { + SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { SmallVector<BasicBlock *, 8> BBWorklist; // We start from the exit blocks, as every block trivially dominates itself @@ -279,7 +269,7 @@ static void computeBlocksDominatingExits( if (!L.contains(IDomBB)) continue; - if (BlocksDominatingExits.insert(IDomBB).second) + if (BlocksDominatingExits.insert(IDomBB)) BBWorklist.push_back(IDomBB); } } @@ -293,7 +283,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, if (ExitBlocks.empty()) return false; - SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits; + SmallSetVector<BasicBlock *, 8> BlocksDominatingExits; // We want to avoid use-scanning leveraging dominance informations. // If a block doesn't dominate any of the loop exits, the none of the values @@ -315,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, !isa<PHINode>(I.user_back()))) continue; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I.getType()->isTokenTy()) + continue; + Worklist.push_back(&I); } } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 18b29226c2ef..8c5442762643 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -1227,13 +1227,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - SmallVector<uint64_t, 1> NewDIExpr; - auto *DIExpr = DDI->getExpression(); - NewDIExpr.push_back(dwarf::DW_OP_deref); - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DIB.createExpression(NewDIExpr), - DDI->getDebugLoc(), CI); + DDI->getExpression(), DDI->getDebugLoc(), + CI); } } DDI->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 73c14f5606b7..5c21490793e7 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = UINT_MAX; + // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { // Make sure the loop is in simplified form @@ -66,10 +71,62 @@ static bool canPeel(Loop *L) { return true; } +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) return; @@ -78,30 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; - // Try to find a Phi node that has the same loop invariant as an input from - // its only back edge. If there is such Phi, peeling 1 iteration from the - // loop is profitable, because starting from 2nd iteration we will have an - // invariant instead of this Phi. - if (LoopSize <= UP.Threshold) { + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + // First, check that we can peel at least one iteration. + if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + unsigned DesiredPeelCount = 0; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); - BasicBlock *Header = L->getHeader(); - // Iterate over Phis to find one with invariant input on back edge. - bool FoundCandidate = false; - PHINode *Phi; - for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) { - Phi = cast<PHINode>(&*BI); - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - if (L->isLoopInvariant(Input)) { - FoundCandidate = true; - break; - } + for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { + PHINode *Phi = cast<PHINode>(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } - if (FoundCandidate) { - DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi - << " because starting from 2nd iteration it is always" - << " an invariant\n"); - UP.PeelCount = 1; + if (DesiredPeelCount > 0) { + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; return; } } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 127a44df5344..2f575b9d5027 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3086,7 +3086,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; - if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp index 4aeea02b1b1b..83bd29dbca65 100644 --- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -24,6 +24,11 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) return false; + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(StoredVal->getType()) != + DL.isNonIntegralPointerType(LoadTy)) + return false; + return true; } diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 595b2ec88943..7eb8fabe0b2f 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -422,7 +422,8 @@ protected: // When we if-convert we need to create edge masks. We have to cache values // so that we don't end up with exponential recursion/IR. typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts> - EdgeMaskCache; + EdgeMaskCacheTy; + typedef DenseMap<BasicBlock *, VectorParts> BlockMaskCacheTy; /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(); @@ -785,7 +786,8 @@ protected: /// Store instructions that should be predicated, as a pair /// <StoreInst, Predicate> SmallVector<std::pair<Instruction *, Value *>, 4> PredicatedInstructions; - EdgeMaskCache MaskCache; + EdgeMaskCacheTy EdgeMaskCache; + BlockMaskCacheTy BlockMaskCache; /// Trip count of the original loop. Value *TripCount; /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) @@ -4560,8 +4562,8 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // Look for cached value. std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst); - EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge); - if (ECEntryIt != MaskCache.end()) + EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge); + if (ECEntryIt != EdgeMaskCache.end()) return ECEntryIt->second; VectorParts SrcMask = createBlockInMask(Src); @@ -4580,11 +4582,11 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { for (unsigned part = 0; part < UF; ++part) EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]); - MaskCache[Edge] = EdgeMask; + EdgeMaskCache[Edge] = EdgeMask; return EdgeMask; } - MaskCache[Edge] = SrcMask; + EdgeMaskCache[Edge] = SrcMask; return SrcMask; } @@ -4592,10 +4594,17 @@ InnerLoopVectorizer::VectorParts InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + // Look for cached value. + BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); + if (BCEntryIt != BlockMaskCache.end()) + return BCEntryIt->second; + // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) { Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); - return getVectorValue(C); + const VectorParts &BlockMask = getVectorValue(C); + BlockMaskCache[BB] = BlockMask; + return BlockMask; } // This is the block mask. We OR all incoming edges, and with zero. @@ -4609,6 +4618,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]); } + BlockMaskCache[BB] = BlockMask; return BlockMask; } diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index da3ac06ab464..554944404708 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4146,8 +4146,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to // reorder a larger list. In fact, we do exactly this when vectorizing - // reductions. However, at this point, we only expect to get here from - // tryToVectorizePair(). + // reductions. However, at this point, we only expect to get here when + // there are exactly two operations. assert(Ops.size() == 2); assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; @@ -4904,7 +4904,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to vectorize them. unsigned NumElts = (SameTypeIt - IncIt); DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { + // The order in which the phi nodes appear in the program does not matter. + // So allow tryToVectorizeList to reorder them if it is beneficial. This + // is done when there are exactly two elements since tryToVectorizeList + // asserts that there are only two values when AllowReorder is true. + bool AllowReorder = NumElts == 2; + if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, + None, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; |