diff options
Diffstat (limited to 'contrib/llvm/lib')
335 files changed, 9971 insertions, 6341 deletions
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp index 1fae94724487..0468c794e81d 100644 --- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp +++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp @@ -29,15 +29,16 @@ static cl::opt<bool> cl::desc("Enable verification of assumption cache"), cl::init(false)); -SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) { +SmallVector<WeakTrackingVH, 1> & +AssumptionCache::getOrInsertAffectedValues(Value *V) { // Try using find_as first to avoid creating extra value handles just for the // purpose of doing the lookup. auto AVI = AffectedValues.find_as(V); if (AVI != AffectedValues.end()) return AVI->second; - auto AVIP = AffectedValues.insert({ - AffectedValueCallbackVH(V, this), SmallVector<WeakVH, 1>()}); + auto AVIP = AffectedValues.insert( + {AffectedValueCallbackVH(V, this), SmallVector<WeakTrackingVH, 1>()}); return AVIP.first->second; } diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h index 75726e84569b..a8fb12b72568 100644 --- a/contrib/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm/lib/Analysis/CFLGraph.h @@ -429,7 +429,7 @@ template <typename CFLAA> class CFLGraphBuilder { if (Inst->getType()->isPointerTy()) { auto *Fn = CS.getCalledFunction(); - if (Fn == nullptr || !Fn->doesNotAlias(0)) + if (Fn == nullptr || !Fn->returnDoesNotAlias()) // No need to call addNode() since we've added Inst at the // beginning of this function and we know it is not a global. Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index ea70f5752c61..8058e5b1935c 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -204,7 +204,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call - // entirely and the WeakVH nulled it out. + // entirely and the WeakTrackingVH nulled it out. if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge @@ -347,7 +347,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, DevirtualizedCall = true; // After scanning this function, if we still have entries in callsites, then - // they are dangling pointers. WeakVH should save us for this, so abort if + // they are dangling pointers. WeakTrackingVH should save us for this, so + // abort if // this happens. assert(CallSites.empty() && "Dangling pointers found in call sites map"); diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index 285339deaaf5..9f5dc5318239 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -181,7 +181,7 @@ void DemandedBits::determineLiveOperandBits( // bits, then we must keep the highest input bit. if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) .getBoolValue()) - AB.setBit(BitWidth-1); + AB.setSignBit(); // If the shift is exact, then the low bits are not dead // (they must be zero). @@ -239,7 +239,7 @@ void DemandedBits::determineLiveOperandBits( if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), AOut.getBitWidth() - BitWidth)) .getBoolValue()) - AB.setBit(BitWidth-1); + AB.setSignBit(); break; case Instruction::Select: if (OperandNo != 0) diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index 788f908bafca..100a591e452c 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -54,6 +54,11 @@ static cl::opt<int> cl::init(45), cl::desc("Threshold for inlining cold callsites")); +static cl::opt<bool> + EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden, + cl::init(false), + cl::desc("Enable generic switch cost model")); + // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -998,11 +1003,72 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (isa<ConstantInt>(V)) return true; - // Otherwise, we need to accumulate a cost proportional to the number of - // distinct successor blocks. This fan-out in the CFG cannot be represented - // for free even if we can represent the core switch as a jumptable that - // takes a single instruction. - // + if (EnableGenericSwitchCost) { + // Assume the most general case where the swith is lowered into + // either a jump table, bit test, or a balanced binary tree consisting of + // case clusters without merging adjacent clusters with the same + // destination. We do not consider the switches that are lowered with a mix + // of jump table/bit test/binary search tree. The cost of the switch is + // proportional to the size of the tree or the size of jump table range. + + // Exit early for a large switch, assuming one case needs at least one + // instruction. + // FIXME: This is not true for a bit test, but ignore such case for now to + // save compile-time. + int64_t CostLowerBound = + std::min((int64_t)INT_MAX, + (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); + + if (CostLowerBound > Threshold) { + Cost = CostLowerBound; + return false; + } + + unsigned JumpTableSize = 0; + unsigned NumCaseCluster = + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + Cost = std::min((int64_t)INT_MAX, JTCost + Cost); + return false; + } + + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + return false; + } + int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; + uint64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); + return false; + } + + // Use a simple switch cost model where we accumulate a cost proportional to + // the number of distinct successor blocks. This fan-out in the CFG cannot + // be represented for free even if we can represent the core switch as a + // jumptable that takes a single instruction. + /// // NB: We convert large switches which are just used to initialize large phi // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent // inlining those. It will prevent inlining in cases where the optimization @@ -1217,36 +1283,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the rest of the function body. Threshold += (SingleBBBonus + FiftyPercentVectorBonus); - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL.getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); + // Give out bonuses for the callsite, as the instructions setting them up + // will be gone after inlining. + Cost -= getCallsiteCost(CS, DL); - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } - } - // The call instruction also disappears after inlining. - Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty; - // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = @@ -1431,6 +1471,38 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, AttributeFuncs::areInlineCompatible(*Caller, *Callee); } +int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { + int Cost = 0; + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = DL.getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost += 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost += InlineConstants::InstrCost; + } + } + // The call instruction also disappears after inlining. + Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty; + return Cost; +} + InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index e720e3ebecdb..7aa6abf8fa48 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -584,14 +586,6 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Query) { return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); } @@ -800,14 +794,6 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } @@ -954,27 +940,10 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); } -Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { @@ -982,26 +951,10 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } @@ -1124,13 +1077,6 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } @@ -1155,13 +1101,6 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); } @@ -1208,15 +1147,6 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); } @@ -1263,13 +1193,6 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); } @@ -1294,13 +1217,6 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } @@ -1328,15 +1244,6 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); } @@ -1465,14 +1372,6 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } @@ -1494,15 +1393,6 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); } @@ -1533,15 +1423,6 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } @@ -1793,13 +1674,6 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } @@ -2023,13 +1897,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); } @@ -2075,13 +1942,6 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); } @@ -3449,15 +3309,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } @@ -3587,15 +3438,6 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } @@ -3845,9 +3687,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, return TrueVal; if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y - if (isa<Constant>(TrueVal)) - return TrueVal; - return FalseVal; + if (isa<Constant>(FalseVal)) + return FalseVal; + return TrueVal; } if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X return FalseVal; @@ -3862,15 +3704,6 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q) { return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } @@ -3988,14 +3821,6 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyGEPInst(SrcTy, Ops, {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - -Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, const SimplifyQuery &Q) { return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); } @@ -4029,14 +3854,6 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return nullptr; } -Value *llvm::SimplifyInsertValueInst( - Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL, - const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const SimplifyQuery &Q) { @@ -4069,16 +3886,6 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, } Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyExtractValueInst(Agg, Idxs, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, const SimplifyQuery &Q) { return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); } @@ -4108,13 +3915,6 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ return nullptr; } -Value *llvm::SimplifyExtractElementInst( - Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractElementInst(Vec, Idx, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q) { return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); @@ -4188,15 +3988,6 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, } Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCastInst(CastOpc, Op, Ty, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q) { return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); } @@ -4258,6 +4049,9 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { + if (isa<UndefValue>(Mask)) + return UndefValue::get(RetTy); + Type *InVecTy = Op0->getType(); unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); unsigned InVecNumElts = InVecTy->getVectorNumElements(); @@ -4269,14 +4063,18 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, if (Op0Const && Op1Const) return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); + SmallVector<int, 32> Indices; + ShuffleVectorInst::getShuffleMask(Mask, Indices); + assert(MaskNumElts == Indices.size() && + "Size of Indices not same as number of mask elements?"); + // If only one of the operands is constant, constant fold the shuffle if the // mask does not select elements from the variable operand. bool MaskSelects0 = false, MaskSelects1 = false; for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = ShuffleVectorInst::getMaskValue(Mask, i); - if (Idx == -1) + if (Indices[i] == -1) continue; - if ((unsigned)Idx < InVecNumElts) + if ((unsigned)Indices[i] < InVecNumElts) MaskSelects0 = true; else MaskSelects1 = true; @@ -4302,9 +4100,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. // TODO: Should we allow this? - for (unsigned i = 0; i != MaskNumElts; ++i) - if (ShuffleVectorInst::getMaskValue(Mask, i) == -1) - return nullptr; + if (find(Indices, -1) != Indices.end()) + return nullptr; // Check if every element of this shuffle can be mapped back to the // corresponding element of a single root vector. If so, we don't need this @@ -4324,14 +4121,6 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, } /// Given operands for a ShuffleVectorInst, fold the result or return null. -Value *llvm::SimplifyShuffleVectorInst( - Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, - const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, - {DL, TLI, DT, AC, CxtI}, RecursionLimit); -} - Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q) { return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); @@ -4407,28 +4196,11 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } @@ -4442,14 +4214,6 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } @@ -4673,42 +4437,21 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, } Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout &DL, - const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q) { return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCall(V, Args.begin(), Args.end(), {DL, TLI, DT, AC, CxtI}, - RecursionLimit); -} - -Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, const SimplifyQuery &Q) { return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. /// If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - OptimizationRemarkEmitter *ORE) { - return SimplifyInstruction(I, {DL, TLI, DT, AC, I}, ORE); -} -Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, +Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { + const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I); Value *Result; switch (I->getOpcode()) { @@ -4905,7 +4648,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC); + SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (!SimpleV) continue; @@ -4944,3 +4687,31 @@ bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, assert(SimpleV && "Must provide a simplified value."); return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } + +namespace llvm { +const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { + auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>(); + auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; + return {F.getParent()->getDataLayout(), TLI, DT, AC}; +} + +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR, + const DataLayout &DL) { + return {DL, &AR.TLI, &AR.DT, &AR.AC}; +} + +template <class T, class... TArgs> +const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM, + Function &F) { + auto *DT = AM.template getCachedResult<DominatorTreeAnalysis>(F); + auto *TLI = AM.template getCachedResult<TargetLibraryAnalysis>(F); + auto *AC = AM.template getCachedResult<AssumptionAnalysis>(F); + return {F.getParent()->getDataLayout(), TLI, DT, AC}; +} +template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &, + Function &); +} diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index ad01f7f2f215..a98383eaf4aa 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -920,7 +920,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, // value is overdefined. if (BB == &BB->getParent()->getEntryBlock()) { assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); - // Bofore giving up, see if we can prove the pointer non-null local to + // Before giving up, see if we can prove the pointer non-null local to // this particular block. if (Val->getType()->isPointerTy() && (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) { diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 0f04af54cdc7..598138246445 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -699,7 +699,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) + if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast<Constant>(V)) { if (Value *W = ConstantFoldConstant(C, *DL, TLI)) diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index f6d9a73e4e9a..a83412506a07 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -451,12 +451,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( auto &Summary = GlobalList.second[0]; bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { - // If a global value definition references an unnamed global, - // be conservative. They're valid IR so we don't want to crash - // when we encounter any of them but they're infrequent enough - // that we don't bother optimizing them. - if (!VI.getValue()->hasName()) - return false; return !CantBePromoted.count(VI.getValue()->getGUID()); }); if (!AllRefsCanBeExternallyReferenced) { diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index 84ecd4ab9809..682af4dc708e 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Simplify the GEP to handle 'gep x, 0' -> x etc. if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), - GEPOps, DL, TLI, DT, AC)) { + GEPOps, {DL, TLI, DT, AC})) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -276,7 +276,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 3ac4bf1276eb..bd747f7c0b7a 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4108,127 +4108,128 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { break; } } - if (BEValueV && StartValueV) { - // While we are analyzing this PHI node, handle its value symbolically. - const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && - "PHI node already processed?"); - ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); - - // Using this symbolic name for the PHI, analyze the value coming around - // the back-edge. - const SCEV *BEValue = getSCEV(BEValueV); - - // NOTE: If BEValue is loop invariant, we know that the PHI node just - // has a special value for the first iteration of the loop. - - // If the value coming around the backedge is an add with the symbolic - // value we just inserted, then we found a simple induction variable! - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { - // If there is a single occurrence of the symbolic value, replace it - // with a recurrence. - unsigned FoundIndex = Add->getNumOperands(); - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (Add->getOperand(i) == SymbolicName) - if (FoundIndex == e) { - FoundIndex = i; - break; - } + if (!BEValueV || !StartValueV) + return nullptr; - if (FoundIndex != Add->getNumOperands()) { - // Create an add with everything but the specified operand. - SmallVector<const SCEV *, 8> Ops; - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (i != FoundIndex) - Ops.push_back(Add->getOperand(i)); - const SCEV *Accum = getAddExpr(Ops); - - // This is not a valid addrec if the step amount is varying each - // loop iteration, but is not itself an addrec in this loop. - if (isLoopInvariant(Accum, L) || - (isa<SCEVAddRecExpr>(Accum) && - cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - - if (auto BO = MatchBinaryOp(BEValueV, DT)) { - if (BO->Opcode == Instruction::Add && BO->LHS == PN) { - if (BO->IsNUW) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (BO->IsNSW) - Flags = setFlags(Flags, SCEV::FlagNSW); - } - } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { - // If the increment is an inbounds GEP, then we know the address - // space cannot be wrapped around. We cannot make any guarantee - // about signed or unsigned overflow because pointers are - // unsigned but we may have a negative index from the base - // pointer. We can guarantee that no unsigned wrap occurs if the - // indices form a positive value. - if (GEP->isInBounds() && GEP->getOperand(0) == PN) { - Flags = setFlags(Flags, SCEV::FlagNW); - - const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); - if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) - Flags = setFlags(Flags, SCEV::FlagNUW); - } + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } - // We cannot transfer nuw and nsw flags from subtraction - // operations -- sub nuw X, Y is not the same as add nuw X, -Y - // for instance. + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector<const SCEV *, 8> Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa<SCEVAddRecExpr>(Accum) && + cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + if (auto BO = MatchBinaryOp(BEValueV, DT)) { + if (BO->Opcode == Instruction::Add && BO->LHS == PN) { + if (BO->IsNUW) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (BO->IsNSW) + Flags = setFlags(Flags, SCEV::FlagNSW); + } + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { + Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); } - const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. + } - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - // We can add Flags to the post-inc expression only if we - // know that it us *undefined behavior* for BEValueV to - // overflow. - if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) - if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; - } + // We can add Flags to the post-inc expression only if we + // know that it us *undefined behavior* for BEValueV to + // overflow. + if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) + if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + + return PHISCEV; } - } else { - // Otherwise, this could be a loop like this: - // i = 0; for (j = 1; ..; ++j) { .... i = j; } - // In this case, j = {1,+,1} and BEValue is j. - // Because the other in-value of i (0) fits the evolution of BEValue - // i really is an addrec evolution. - // - // We can generalize this saying that i is the shifted value of BEValue - // by one iteration: - // PHI(f(0), f({1,+,1})) --> f({0,+,1}) - const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); - const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); - if (Shifted != getCouldNotCompute() && - Start != getCouldNotCompute()) { - const SCEV *StartVal = getSCEV(StartValueV); - if (Start == StartVal) { - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; - return Shifted; - } + } + } else { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + // + // We can generalize this saying that i is the shifted value of BEValue + // by one iteration: + // PHI(f(0), f({1,+,1})) --> f({0,+,1}) + const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); + if (Shifted != getCouldNotCompute() && + Start != getCouldNotCompute()) { + const SCEV *StartVal = getSCEV(StartValueV); + if (Start == StartVal) { + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + return Shifted; } } - - // Remove the temporary PHI node SCEV that has been inserted while intending - // to create an AddRecExpr for this PHI node. We can not keep this temporary - // as it will prevent later (possibly simpler) SCEV expressions to be added - // to the ValueExprMap. - eraseValueFromMap(PN); } + // Remove the temporary PHI node SCEV that has been inserted while intending + // to create an AddRecExpr for this PHI node. We can not keep this temporary + // as it will prevent later (possibly simpler) SCEV expressions to be added + // to the ValueExprMap. + eraseValueFromMap(PN); + return nullptr; } @@ -4388,7 +4389,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) + if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -5028,7 +5029,8 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { return false; // Only proceed if we can prove that I does not yield poison. - if (!isKnownNotFullPoison(I)) return false; + if (!programUndefinedIfFullPoison(I)) + return false; // At this point we know that if I is executed, then it does not wrap // according to at least one of NSW or NUW. If I is not executed, then we do diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 6dd10441c4cb..86cbd79aa84e 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1772,9 +1772,10 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, /// /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. -unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakVH> &DeadInsts, - const TargetTransformInfo *TTI) { +unsigned +SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakTrackingVH> &DeadInsts, + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector<PHINode*, 8> Phis; for (auto &I : *L->getHeader()) { @@ -1799,7 +1800,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // so narrow phis can reuse them. for (PHINode *Phi : Phis) { auto SimplifyPHINode = [&](PHINode *PN) -> Value * { - if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) + if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) return V; if (!SE.isSCEVable(PN->getType())) return nullptr; diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index d73b1a128031..26d606cce9bb 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -83,6 +83,12 @@ int TargetTransformInfo::getIntrinsicCost( return Cost; } +unsigned +TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) const { + return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); +} + int TargetTransformInfo::getUserCost(const User *U) const { int Cost = TTIImpl->getUserCost(U); assert(Cost >= 0 && "TTI should not produce negative costs!"); diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index af964b6259bb..6ec175fc84e2 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -296,12 +296,12 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, if (NSW) { // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. - if (LHSKnown.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) - KnownOut.Zero.setSignBit(); + if (LHSKnown.isNonNegative() && Known2.isNonNegative()) + KnownOut.makeNonNegative(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. - else if (LHSKnown.One.isSignBitSet() && Known2.One.isSignBitSet()) - KnownOut.One.setSignBit(); + else if (LHSKnown.isNegative() && Known2.isNegative()) + KnownOut.makeNegative(); } } } @@ -321,10 +321,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { - bool isKnownNonNegativeOp1 = Known.Zero.isSignBitSet(); - bool isKnownNonNegativeOp0 = Known2.Zero.isSignBitSet(); - bool isKnownNegativeOp1 = Known.One.isSignBitSet(); - bool isKnownNegativeOp0 = Known2.One.isSignBitSet(); + bool isKnownNonNegativeOp1 = Known.isNonNegative(); + bool isKnownNonNegativeOp0 = Known2.isNonNegative(); + bool isKnownNegativeOp1 = Known.isNegative(); + bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); @@ -360,21 +360,20 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. - if (isKnownNonNegative && !Known.One.isSignBitSet()) - Known.Zero.setSignBit(); - else if (isKnownNegative && !Known.Zero.isSignBitSet()) - Known.One.setSignBit(); + if (isKnownNonNegative && !Known.isNegative()) + Known.makeNonNegative(); + else if (isKnownNegative && !Known.isNonNegative()) + Known.makeNegative(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero, - APInt &KnownOne) { - unsigned BitWidth = KnownZero.getBitWidth(); + KnownBits &Known) { + unsigned BitWidth = Known.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); - KnownZero.setAllBits(); - KnownOne.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = @@ -388,8 +387,8 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); - KnownOne &= Range.getUnsignedMax() & Mask; - KnownZero &= ~Range.getUnsignedMax() & Mask; + Known.One &= Range.getUnsignedMax() & Mask; + Known.Zero &= ~Range.getUnsignedMax() & Mask; } } @@ -709,9 +708,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnown.Zero.isSignBitSet()) { + if (RHSKnown.isNonNegative()) { // We know that the sign bit is zero. - Known.Zero.setSignBit(); + Known.makeNonNegative(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -720,9 +719,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnown.One.isAllOnesValue() || RHSKnown.Zero.isSignBitSet()) { + if (RHSKnown.One.isAllOnesValue() || RHSKnown.isNonNegative()) { // We know that the sign bit is zero. - Known.Zero.setSignBit(); + Known.makeNonNegative(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -731,9 +730,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnown.One.isSignBitSet()) { + if (RHSKnown.isNegative()) { // We know that the sign bit is one. - Known.One.setSignBit(); + Known.makeNegative(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -742,9 +741,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.One.isSignBitSet()) { + if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.isNegative()) { // We know that the sign bit is one. - Known.One.setSignBit(); + Known.makeNegative(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -902,7 +901,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); + computeKnownBitsFromRangeMetadata(*MD, Known); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -992,23 +991,23 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (Known.One.isSignBitSet() && Known2.One.isSignBitSet()) + if (Known.isNegative() && Known2.isNegative()) // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (Known.Zero.isSignBitSet() || Known2.Zero.isSignBitSet()) + else if (Known.isNonNegative() || Known2.isNonNegative()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (Known.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) + if (Known.isNonNegative() && Known2.isNonNegative()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes()); // If either side is negative, the result is negative. - else if (Known.One.isSignBitSet() || Known2.One.isSignBitSet()) + else if (Known.isNegative() || Known2.isNegative()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the @@ -1163,12 +1162,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero.isSignBitSet() || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One.isSignBitSet() && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); @@ -1180,8 +1179,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // remainder is zero. computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If it's known zero, our sign bit is also zero. - if (Known2.Zero.isSignBitSet()) - Known.Zero.setSignBit(); + if (Known2.isNonNegative()) + Known.makeNonNegative(); break; case Instruction::URem: { @@ -1321,25 +1320,25 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { - if (Known2.Zero.isSignBitSet() && Known3.Zero.isSignBitSet()) - Known.Zero.setSignBit(); - else if (Known2.One.isSignBitSet() && Known3.One.isSignBitSet()) - Known.One.setSignBit(); + if (Known2.isNonNegative() && Known3.isNonNegative()) + Known.makeNonNegative(); + else if (Known2.isNegative() && Known3.isNegative()) + Known.makeNegative(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { - if (Known2.Zero.isSignBitSet() && Known3.One.isSignBitSet()) - Known.Zero.setSignBit(); - else if (Known2.One.isSignBitSet() && Known3.Zero.isSignBitSet()) - Known.One.setSignBit(); + if (Known2.isNonNegative() && Known3.isNegative()) + Known.makeNonNegative(); + else if (Known2.isNegative() && Known3.isNonNegative()) + Known.makeNegative(); } // (mul nsw non-negative, non-negative) --> non-negative - else if (Opcode == Instruction::Mul && Known2.Zero.isSignBitSet() && - Known3.Zero.isSignBitSet()) - Known.Zero.setSignBit(); + else if (Opcode == Instruction::Mul && Known2.isNonNegative() && + Known3.isNonNegative()) + Known.makeNonNegative(); } break; @@ -1384,7 +1383,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // and then intersect with known bits based on other properties of the // function. if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); + computeKnownBitsFromRangeMetadata(*MD, Known); if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { computeKnownBits(RV, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero; @@ -1599,8 +1598,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, } KnownBits Bits(BitWidth); computeKnownBits(V, Bits, Depth, Q); - KnownOne = Bits.One.isSignBitSet(); - KnownZero = Bits.Zero.isSignBitSet(); + KnownOne = Bits.isNegative(); + KnownZero = Bits.isNonNegative(); } /// Return true if the given value is known to have exactly one @@ -2221,7 +2220,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we are subtracting one from a positive number, there is no carry // out of the result. - if (Known.Zero.isSignBitSet()) + if (Known.isNonNegative()) return Tmp; } @@ -2245,7 +2244,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (Known.Zero.isSignBitSet()) + if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -2302,10 +2301,10 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (Known.Zero.isSignBitSet()) + if (Known.isNonNegative()) return std::max(FirstAnswer, Known.Zero.countLeadingOnes()); - if (Known.One.isSignBitSet()) + if (Known.isNegative()) return std::max(FirstAnswer, Known.One.countLeadingOnes()); // computeKnownBits gave us no extra information about the top bits. @@ -3198,7 +3197,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. - if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) { + if (Value *Simplified = SimplifyInstruction(I, {DL, I})) { V = Simplified; continue; } @@ -3319,63 +3318,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, LI->getAlignment(), DL, CtxI, DT); } case Instruction::Call: { - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects and just mark - // information about their operands. - // FIXME: There are other no-op synthetic instructions that potentially - // should be considered at least *safe* to speculate... - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - return true; + auto *CI = cast<const CallInst>(Inst); + const Function *Callee = CI->getCalledFunction(); - case Intrinsic::bitreverse: - case Intrinsic::bswap: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::objectsize: - case Intrinsic::sadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::usub_with_overflow: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions except for setting errno. - case Intrinsic::sqrt: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, and the corresponding libm functions never set errno. - case Intrinsic::trunc: - case Intrinsic::copysign: - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, which never overflow when operating on the IEEE754 types - // that we support, and never set errno otherwise. - case Intrinsic::ceil: - case Intrinsic::floor: - case Intrinsic::nearbyint: - case Intrinsic::rint: - case Intrinsic::round: - return true; - // These intrinsics do not correspond to any libm function, and - // do not set errno. - case Intrinsic::powi: - return true; - // TODO: are convert_{from,to}_fp16 safe? - // TODO: can we list target-specific intrinsics here? - default: break; - } - } - return false; // The called function could have undefined behavior or - // side-effects, even if marked readnone nounwind. + // The called function could have undefined behavior or side-effects, even + // if marked readnone nounwind. + return Callee && Callee->isSpeculatable(); } case Instruction::VAArg: case Instruction::Alloca: @@ -3836,7 +3784,7 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { } } -bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { +bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { // We currently only look for uses of poison values within the same basic // block, as that makes it easier to guarantee that the uses will be // executed given that PoisonI is executed. diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index 49a8ce4bed0b..a49276099f19 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -601,6 +601,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(hhvm_ccc); KEYWORD(cxx_fast_tlscc); KEYWORD(amdgpu_vs); + KEYWORD(amdgpu_hs); KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); @@ -648,6 +649,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(returned); KEYWORD(returns_twice); KEYWORD(signext); + KEYWORD(speculatable); KEYWORD(sret); KEYWORD(ssp); KEYWORD(sspreq); diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index c7076ed0dd81..97a567565b47 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -1095,6 +1095,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_speculatable: B.addAttribute(Attribute::Speculatable); break; case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; case lltok::kw_sspstrong: @@ -1667,8 +1668,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'hhvm_ccc' /// ::= 'cxx_fast_tlscc' /// ::= 'amdgpu_vs' -/// ::= 'amdgpu_tcs' -/// ::= 'amdgpu_tes' +/// ::= 'amdgpu_hs' /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' @@ -1710,6 +1710,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break; + case lltok::kw_amdgpu_hs: CC = CallingConv::AMDGPU_HS; break; case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; @@ -4071,7 +4072,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { /// virtuality: DW_VIRTUALTIY_pure_virtual, /// virtualIndex: 10, thisAdjustment: 4, flags: 11, /// isOptimized: false, templateParams: !4, declaration: !5, -/// variables: !6) +/// variables: !6, thrownTypes: !7) bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { auto Loc = Lex.getLoc(); #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ @@ -4093,7 +4094,8 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { OPTIONAL(unit, MDField, ); \ OPTIONAL(templateParams, MDField, ); \ OPTIONAL(declaration, MDField, ); \ - OPTIONAL(variables, MDField, ); + OPTIONAL(variables, MDField, ); \ + OPTIONAL(thrownTypes, MDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -4103,12 +4105,12 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { "missing 'distinct', required for !DISubprogram when 'isDefinition'"); Result = GET_OR_DISTINCT( - DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val, - line.Val, type.Val, isLocal.Val, isDefinition.Val, - scopeLine.Val, containingType.Val, virtuality.Val, - virtualIndex.Val, thisAdjustment.Val, flags.Val, - isOptimized.Val, unit.Val, templateParams.Val, - declaration.Val, variables.Val)); + DISubprogram, + (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val, + type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val, + containingType.Val, virtuality.Val, virtualIndex.Val, thisAdjustment.Val, + flags.Val, isOptimized.Val, unit.Val, templateParams.Val, + declaration.Val, variables.Val, thrownTypes.Val)); return false; } @@ -4148,15 +4150,13 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) { bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(scope, MDField, ); \ - OPTIONAL(file, MDField, ); \ OPTIONAL(name, MDStringField, ); \ - OPTIONAL(line, LineField, ); \ OPTIONAL(exportSymbols, MDBoolField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS Result = GET_OR_DISTINCT(DINamespace, - (Context, scope.Val, file.Val, name.Val, line.Val, exportSymbols.Val)); + (Context, scope.Val, name.Val, exportSymbols.Val)); return false; } diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index 33f8e63daa05..6c8ed7da495d 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -153,6 +153,7 @@ enum Kind { kw_hhvm_ccc, kw_cxx_fast_tlscc, kw_amdgpu_vs, + kw_amdgpu_hs, kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, @@ -198,6 +199,7 @@ enum Kind { kw_returned, kw_returns_twice, kw_signext, + kw_speculatable, kw_ssp, kw_sspreq, kw_sspstrong, diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 6d727ce83346..8b6f79a81b93 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -93,6 +93,13 @@ static cl::opt<bool> PrintSummaryGUIDs( cl::desc( "Print the global id for each value when reading the module summary")); +// FIXME: This flag should either be removed or moved to clang as a driver flag. +static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", llvm::cl::ZeroOrMore, + llvm::cl::desc( + "Ignore an empty index file and perform non-ThinLTO compilation"), + llvm::cl::init(false)); + namespace { enum { @@ -706,11 +713,20 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// Original source file name recorded in a bitcode record. std::string SourceFileName; + /// The string identifier given to this module by the client, normally the + /// path to the bitcode file. + StringRef ModulePath; + + /// For per-module summary indexes, the unique numerical identifier given to + /// this module by the client. + unsigned ModuleId; + public: ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, - ModuleSummaryIndex &TheIndex); + ModuleSummaryIndex &TheIndex, + StringRef ModulePath, unsigned ModuleId); - Error parseModule(StringRef ModulePath); + Error parseModule(); private: void setValueGUID(uint64_t ValueID, StringRef ValueName, @@ -723,11 +739,13 @@ private: std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record, bool IsOldProfileFormat, bool HasProfile); - Error parseEntireSummary(StringRef ModulePath); + Error parseEntireSummary(); Error parseModuleStringTable(); std::pair<GlobalValue::GUID, GlobalValue::GUID> getGUIDFromValueId(unsigned ValueId); + + ModulePathStringTableTy::iterator addThisModulePath(); }; } // end anonymous namespace @@ -1119,6 +1137,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::SwiftSelf: return 1ULL << 51; case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; + case Attribute::Speculatable: return 1ULL << 54; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -1315,6 +1334,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::ReturnsTwice; case bitc::ATTR_KIND_S_EXT: return Attribute::SExt; + case bitc::ATTR_KIND_SPECULATABLE: + return Attribute::Speculatable; case bitc::ATTR_KIND_STACK_ALIGNMENT: return Attribute::StackAlignment; case bitc::ATTR_KIND_STACK_PROTECT: @@ -4666,8 +4687,15 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const { } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( - BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex) - : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {} + BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex, + StringRef ModulePath, unsigned ModuleId) + : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), + ModulePath(ModulePath), ModuleId(ModuleId) {} + +ModulePathStringTableTy::iterator +ModuleSummaryIndexBitcodeReader::addThisModulePath() { + return TheIndex.addModulePath(ModulePath, ModuleId); +} std::pair<GlobalValue::GUID, GlobalValue::GUID> ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { @@ -4777,7 +4805,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( // Parse just the blocks needed for building the index out of the module. // At the end of this routine the module Index is populated with a map // from global value id to GlobalValueSummary objects. -Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { +Error ModuleSummaryIndexBitcodeReader::parseModule() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); @@ -4828,7 +4856,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { SeenValueSymbolTable = true; } SeenGlobalValSummary = true; - if (Error Err = parseEntireSummary(ModulePath)) + if (Error Err = parseEntireSummary()) return Err; break; case bitc::MODULE_STRTAB_BLOCK_ID: @@ -4861,12 +4889,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { case bitc::MODULE_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - if (TheIndex.modulePaths().empty()) - // We always seed the index with the module. - TheIndex.addModulePath(ModulePath, 0); - if (TheIndex.modulePaths().size() != 1) - return error("Don't expect multiple modules defined?"); - auto &Hash = TheIndex.modulePaths().begin()->second.second; + auto &Hash = addThisModulePath()->second.second; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); @@ -4941,8 +4964,7 @@ std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallLi // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. -Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( - StringRef ModulePath) { +Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() { if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID)) return error("Invalid record"); SmallVector<uint64_t, 64> Record; @@ -4966,7 +4988,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( // "OriginalName" attachement. GlobalValueSummary *LastSeenSummary = nullptr; GlobalValue::GUID LastSeenGUID = 0; - bool Combined = false; // We can expect to see any number of type ID information records before // each function summary records; these variables store the information @@ -4985,16 +5006,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - // For a per-module index, remove any entries that still have empty - // summaries. The VST parsing creates entries eagerly for all symbols, - // but not all have associated summaries (e.g. it doesn't know how to - // distinguish between VST_CODE_ENTRY for function declarations vs global - // variables with initializers that end up with a summary). Remove those - // entries now so that we don't need to rely on the combined index merger - // to clean them up (especially since that may not run for the first - // module's index if we merge into that). - if (!Combined) - TheIndex.removeEmptySummaryEntries(); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -5058,7 +5069,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); auto GUID = getGUIDFromValueId(ValueID); - FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); + FS->setModulePath(addThisModulePath()->first()); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); break; @@ -5078,13 +5089,14 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. - AS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); + AS->setModulePath(addThisModulePath()->first()); GlobalValue::GUID AliaseeGUID = getGUIDFromValueId(AliaseeID).first; - auto *AliaseeSummary = TheIndex.getGlobalValueSummary(AliaseeGUID); - if (!AliaseeSummary) + auto AliaseeInModule = + TheIndex.findSummaryInModule(AliaseeGUID, ModulePath); + if (!AliaseeInModule) return error("Alias expects aliasee summary to be parsed"); - AS->setAliasee(AliaseeSummary); + AS->setAliasee(AliaseeInModule); auto GUID = getGUIDFromValueId(ValueID); AS->setOriginalName(GUID.second); @@ -5099,7 +5111,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( std::vector<ValueInfo> Refs = makeRefList(ArrayRef<uint64_t>(Record).slice(2)); auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs)); - FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); + FS->setModulePath(addThisModulePath()->first()); auto GUID = getGUIDFromValueId(ValueID); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); @@ -5143,7 +5155,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( LastSeenGUID = GUID; FS->setModulePath(ModuleIdMap[ModuleId]); TheIndex.addGlobalValueSummary(GUID, std::move(FS)); - Combined = true; break; } // FS_COMBINED_ALIAS: [valueid, modid, flags, valueid] @@ -5169,7 +5180,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; LastSeenGUID = GUID; TheIndex.addGlobalValueSummary(GUID, std::move(AS)); - Combined = true; break; } // FS_COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] @@ -5186,7 +5196,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; LastSeenGUID = GUID; TheIndex.addGlobalValueSummary(GUID, std::move(FS)); - Combined = true; break; } // FS_COMBINED_ORIGINAL_NAME: [original_name] @@ -5486,15 +5495,27 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting); } +// Parse the specified bitcode buffer and merge the index into CombinedIndex. +Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, + unsigned ModuleId) { + BitstreamCursor Stream(Buffer); + Stream.JumpToBit(ModuleBit); + + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, + ModuleIdentifier, ModuleId); + return R.parseModule(); +} + // Parse the specified bitcode buffer, returning the function info index. Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique<ModuleSummaryIndex>(); - ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index); + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, + ModuleIdentifier, 0); - if (Error Err = R.parseModule(ModuleIdentifier)) + if (Error Err = R.parseModule()) return std::move(Err); return std::move(Index); @@ -5604,6 +5625,16 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) { return readIdentificationCode(*StreamOrErr); } +Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer, + ModuleSummaryIndex &CombinedIndex, + unsigned ModuleId) { + Expected<BitcodeModule> BM = getSingleModule(Buffer); + if (!BM) + return BM.takeError(); + + return BM->readSummary(CombinedIndex, ModuleId); +} + Expected<std::unique_ptr<ModuleSummaryIndex>> llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { Expected<BitcodeModule> BM = getSingleModule(Buffer); @@ -5620,3 +5651,14 @@ Expected<bool> llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) { return BM->hasSummary(); } + +Expected<std::unique_ptr<ModuleSummaryIndex>> +llvm::getModuleSummaryIndexForFile(StringRef Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (!FileOrErr) + return errorCodeToError(FileOrErr.getError()); + if (IgnoreEmptyThinLTOIndexFile && !(*FileOrErr)->getBufferSize()) + return nullptr; + return getModuleSummaryIndex(**FileOrErr); +} diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index d089684a052f..42135e5949ce 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -474,8 +474,8 @@ class MetadataLoader::MetadataLoaderImpl { for (auto CU_SP : CUSubprograms) if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second)) for (auto &Op : SPs->operands()) - if (auto *SP = dyn_cast_or_null<MDNode>(Op)) - SP->replaceOperandWith(7, CU_SP.first); + if (auto *SP = dyn_cast_or_null<DISubprogram>(Op)) + SP->replaceUnit(CU_SP.first); CUSubprograms.clear(); } @@ -1298,7 +1298,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_SUBPROGRAM: { - if (Record.size() < 18 || Record.size() > 20) + if (Record.size() < 18 || Record.size() > 21) return error("Invalid record"); IsDistinct = @@ -1314,29 +1314,31 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( unsigned Offset = Record.size() >= 19 ? 1 : 0; bool HasFn = Offset && !HasUnit; bool HasThisAdj = Record.size() >= 20; + bool HasThrownTypes = Record.size() >= 21; DISubprogram *SP = GET_OR_DISTINCT( - DISubprogram, (Context, - getDITypeRefOrNull(Record[1]), // scope - getMDString(Record[2]), // name - getMDString(Record[3]), // linkageName - getMDOrNull(Record[4]), // file - Record[5], // line - getMDOrNull(Record[6]), // type - Record[7], // isLocal - Record[8], // isDefinition - Record[9], // scopeLine - getDITypeRefOrNull(Record[10]), // containingType - Record[11], // virtuality - Record[12], // virtualIndex - HasThisAdj ? Record[19] : 0, // thisAdjustment - static_cast<DINode::DIFlags>(Record[13] // flags - ), - Record[14], // isOptimized - HasUnit ? CUorFn : nullptr, // unit - getMDOrNull(Record[15 + Offset]), // templateParams - getMDOrNull(Record[16 + Offset]), // declaration - getMDOrNull(Record[17 + Offset]) // variables - )); + DISubprogram, + (Context, + getDITypeRefOrNull(Record[1]), // scope + getMDString(Record[2]), // name + getMDString(Record[3]), // linkageName + getMDOrNull(Record[4]), // file + Record[5], // line + getMDOrNull(Record[6]), // type + Record[7], // isLocal + Record[8], // isDefinition + Record[9], // scopeLine + getDITypeRefOrNull(Record[10]), // containingType + Record[11], // virtuality + Record[12], // virtualIndex + HasThisAdj ? Record[19] : 0, // thisAdjustment + static_cast<DINode::DIFlags>(Record[13]), // flags + Record[14], // isOptimized + HasUnit ? CUorFn : nullptr, // unit + getMDOrNull(Record[15 + Offset]), // templateParams + getMDOrNull(Record[16 + Offset]), // declaration + getMDOrNull(Record[17 + Offset]), // variables + HasThrownTypes ? getMDOrNull(Record[20]) : nullptr // thrownTypes + )); MetadataList.assignValue(SP, NextMetadataNo); NextMetadataNo++; @@ -1381,16 +1383,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_NAMESPACE: { - if (Record.size() != 5) + // Newer versions of DINamespace dropped file and line. + MDString *Name; + if (Record.size() == 3) + Name = getMDString(Record[2]); + else if (Record.size() == 5) + Name = getMDString(Record[3]); + else return error("Invalid record"); IsDistinct = Record[0] & 1; bool ExportSymbols = Record[0] & 2; MetadataList.assignValue( GET_OR_DISTINCT(DINamespace, - (Context, getMDOrNull(Record[1]), - getMDOrNull(Record[2]), getMDString(Record[3]), - Record[4], ExportSymbols)), + (Context, getMDOrNull(Record[1]), Name, ExportSymbols)), NextMetadataNo); NextMetadataNo++; break; diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp index 7152a51cea6e..d1a2a11bbfad 100644 --- a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp @@ -58,7 +58,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { if (Idx >= size()) resize(Idx + 1); - WeakVH &OldV = ValuePtrs[Idx]; + WeakTrackingVH &OldV = ValuePtrs[Idx]; if (!OldV) { OldV = V; return; diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.h b/contrib/llvm/lib/Bitcode/Reader/ValueList.h index 3119d7735e22..72775a3cf3bc 100644 --- a/contrib/llvm/lib/Bitcode/Reader/ValueList.h +++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.h @@ -20,7 +20,7 @@ namespace llvm { class Constant; class BitcodeReaderValueList { - std::vector<WeakVH> ValuePtrs; + std::vector<WeakTrackingVH> ValuePtrs; /// As we resolve forward-referenced constants, we add information about them /// to this vector. This allows us to resolve them in bulk instead of diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index e5aba03c8dc1..485d9b6ac0bc 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,146 +340,28 @@ public: // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. - for (const auto &I : *this) + forEachSummary([&](GVInfo I) { GUIDToValueIdMap[I.first] = ++GlobalValueId; + }); } /// The below iterator returns the GUID and associated summary. typedef std::pair<GlobalValue::GUID, GlobalValueSummary *> GVInfo; - /// Iterator over the value GUID and summaries to be written to bitcode, - /// hides the details of whether they are being pulled from the entire - /// index or just those in a provided ModuleToSummariesForIndex map. - class iterator - : public llvm::iterator_facade_base<iterator, std::forward_iterator_tag, - GVInfo> { - /// Enables access to parent class. - const IndexBitcodeWriter &Writer; - - // Iterators used when writing only those summaries in a provided - // ModuleToSummariesForIndex map: - - /// Points to the last element in outer ModuleToSummariesForIndex map. - std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesBack; - /// Iterator on outer ModuleToSummariesForIndex map. - std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesIter; - /// Iterator on an inner global variable summary map. - GVSummaryMapTy::const_iterator ModuleGVSummariesIter; - - // Iterators used when writing all summaries in the index: - - /// Points to the last element in the Index outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesBack; - /// Iterator on outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesIter; - /// Iterator on an inner GlobalValueSummaryList. - GlobalValueSummaryList::const_iterator IndexGVSummariesIter; - - public: - /// Construct iterator from parent \p Writer and indicate if we are - /// constructing the end iterator. - iterator(const IndexBitcodeWriter &Writer, bool IsAtEnd) : Writer(Writer) { - // Set up the appropriate set of iterators given whether we are writing - // the full index or just a subset. - // Can't setup the Back or inner iterators if the corresponding map - // is empty. This will be handled specially in operator== as well. - if (Writer.ModuleToSummariesForIndex && - !Writer.ModuleToSummariesForIndex->empty()) { - for (ModuleSummariesBack = Writer.ModuleToSummariesForIndex->begin(); - std::next(ModuleSummariesBack) != - Writer.ModuleToSummariesForIndex->end(); - ModuleSummariesBack++) - ; - ModuleSummariesIter = !IsAtEnd - ? Writer.ModuleToSummariesForIndex->begin() - : ModuleSummariesBack; - ModuleGVSummariesIter = !IsAtEnd ? ModuleSummariesIter->second.begin() - : ModuleSummariesBack->second.end(); - } else if (!Writer.ModuleToSummariesForIndex && - Writer.Index.begin() != Writer.Index.end()) { - for (IndexSummariesBack = Writer.Index.begin(); - std::next(IndexSummariesBack) != Writer.Index.end(); - IndexSummariesBack++) - ; - IndexSummariesIter = - !IsAtEnd ? Writer.Index.begin() : IndexSummariesBack; - IndexGVSummariesIter = !IsAtEnd ? IndexSummariesIter->second.begin() - : IndexSummariesBack->second.end(); - } - } - - /// Increment the appropriate set of iterators. - iterator &operator++() { - // First the inner iterator is incremented, then if it is at the end - // and there are more outer iterations to go, the inner is reset to - // the start of the next inner list. - if (Writer.ModuleToSummariesForIndex) { - ++ModuleGVSummariesIter; - if (ModuleGVSummariesIter == ModuleSummariesIter->second.end() && - ModuleSummariesIter != ModuleSummariesBack) { - ++ModuleSummariesIter; - ModuleGVSummariesIter = ModuleSummariesIter->second.begin(); - } - } else { - ++IndexGVSummariesIter; - if (IndexGVSummariesIter == IndexSummariesIter->second.end() && - IndexSummariesIter != IndexSummariesBack) { - ++IndexSummariesIter; - IndexGVSummariesIter = IndexSummariesIter->second.begin(); - } - } - return *this; - } - - /// Access the <GUID,GlobalValueSummary*> pair corresponding to the current - /// outer and inner iterator positions. - GVInfo operator*() { - if (Writer.ModuleToSummariesForIndex) - return std::make_pair(ModuleGVSummariesIter->first, - ModuleGVSummariesIter->second); - return std::make_pair(IndexSummariesIter->first, - IndexGVSummariesIter->get()); - } - - /// Checks if the iterators are equal, with special handling for empty - /// indexes. - bool operator==(const iterator &RHS) const { - if (Writer.ModuleToSummariesForIndex) { - // First ensure that both are writing the same subset. - if (Writer.ModuleToSummariesForIndex != - RHS.Writer.ModuleToSummariesForIndex) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.ModuleToSummariesForIndex->empty()) - return true; - // Ensure the ModuleGVSummariesIter are iterating over the same - // container before checking them below. - if (ModuleSummariesIter != RHS.ModuleSummariesIter) - return false; - return ModuleGVSummariesIter == RHS.ModuleGVSummariesIter; - } - // First ensure RHS also writing the full index, and that both are - // writing the same full index. - if (RHS.Writer.ModuleToSummariesForIndex || - &Writer.Index != &RHS.Writer.Index) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.Index.begin() == Writer.Index.end()) - return true; - // Ensure the IndexGVSummariesIter are iterating over the same - // container before checking them below. - if (IndexSummariesIter != RHS.IndexSummariesIter) - return false; - return IndexGVSummariesIter == RHS.IndexGVSummariesIter; + /// Calls the callback for each value GUID and summary to be written to + /// bitcode. This hides the details of whether they are being pulled from the + /// entire index or just those in a provided ModuleToSummariesForIndex map. + void forEachSummary(std::function<void(GVInfo)> Callback) { + if (ModuleToSummariesForIndex) { + for (auto &M : *ModuleToSummariesForIndex) + for (auto &Summary : M.second) + Callback(Summary); + } else { + for (auto &Summaries : Index) + for (auto &Summary : Summaries.second) + Callback({Summaries.first, Summary.get()}); } - }; - - /// Obtain the start iterator over the summaries to be written. - iterator begin() { return iterator(*this, /*IsAtEnd=*/false); } - /// Obtain the end iterator over the summaries to be written. - iterator end() { return iterator(*this, /*IsAtEnd=*/true); } + } /// Main entry point for writing a combined index to bitcode. void write(); @@ -688,6 +570,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: return bitc::ATTR_KIND_S_EXT; + case Attribute::Speculatable: + return bitc::ATTR_KIND_SPECULATABLE; case Attribute::StackAlignment: return bitc::ATTR_KIND_STACK_ALIGNMENT; case Attribute::StackProtect: @@ -1608,6 +1492,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N, Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); Record.push_back(N->getThisAdjustment()); + Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); @@ -1643,9 +1528,7 @@ void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N, unsigned Abbrev) { Record.push_back(N->isDistinct() | N->getExportSymbols() << 1); Record.push_back(VE.getMetadataOrNullID(N->getScope())); - Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); - Record.push_back(N->getLine()); Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev); Record.clear(); @@ -3527,16 +3410,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION}); // Create value IDs for undefined references. - for (const auto &I : *this) { + forEachSummary([&](GVInfo I) { if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) { for (auto &RI : VS->refs()) assignValueId(RI.getGUID()); - continue; + return; } auto *FS = dyn_cast<FunctionSummary>(I.second); if (!FS) - continue; + return; for (auto &RI : FS->refs()) assignValueId(RI.getGUID()); @@ -3552,7 +3435,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { } assignValueId(GUID); } - } + }); for (const auto &GVI : valueIds()) { Stream.EmitRecord(bitc::FS_VALUE_GUID, @@ -3623,7 +3506,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; - for (const auto &I : *this) { + forEachSummary([&](GVInfo I) { GlobalValueSummary *S = I.second; assert(S); @@ -3635,7 +3518,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // Will process aliases as a post-pass because the reader wants all // global to be loaded first. Aliases.push_back(AS); - continue; + return; } if (auto *VS = dyn_cast<GlobalVarSummary>(S)) { @@ -3651,7 +3534,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { FSModRefsAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - continue; + return; } auto *FS = cast<FunctionSummary>(S); @@ -3699,7 +3582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - } + }); for (auto *AS : Aliases) { auto AliasValueId = SummaryToValueIdMap[AS]; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d99065b1b67a..b11e30c359b3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -820,7 +820,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const DILocalVariable *V = MI->getDebugVariable(); if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { - StringRef Name = SP->getDisplayName(); + StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 2571f6869651..786b11618d75 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" @@ -237,7 +238,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { // The display name includes function template arguments. Drop them to match // MSVC. - StringRef DisplayName = SP->getDisplayName().split('<').first; + StringRef DisplayName = SP->getName().split('<').first; const DIScope *Scope = SP->getScope().resolve(); TypeIndex TI; @@ -392,7 +393,7 @@ void CodeViewDebug::endModule() { // subprograms. switchToDebugSectionForSymbol(nullptr); - MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *CompilerInfo = beginCVSubsection(ModuleDebugFragmentKind::Symbols); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -416,7 +417,7 @@ void CodeViewDebug::endModule() { // Emit UDT records for any types used by global variables. if (!GlobalUDTs.empty()) { - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); emitDebugInfoForUDTs(GlobalUDTs); endCVSubsection(SymbolsEnd); } @@ -644,7 +645,8 @@ void CodeViewDebug::emitInlineeLinesSubsection() { return; OS.AddComment("Inlinee lines subsection"); - MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines); + MCSymbol *InlineEnd = + beginCVSubsection(ModuleDebugFragmentKind::InlineeLines); // We don't provide any extra file info. // FIXME: Find out if debuggers use this info. @@ -657,7 +659,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { OS.AddBlankLine(); unsigned FileId = maybeRecordFile(SP->getFile()); - OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " + + OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); OS.AddBlankLine(); // The filechecksum table uses 8 byte entries for now, and file ids start at @@ -759,9 +761,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If we have a display name, build the fully qualified name by walking the // chain of scopes. - if (!SP->getDisplayName().empty()) + if (!SP->getName().empty()) FuncName = - getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName()); + getFullyQualifiedName(SP->getScope().resolve(), SP->getName()); // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) @@ -769,7 +771,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols); { MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), *ProcRecordEnd = MMI->getContext().createTempSymbol(); @@ -2114,7 +2116,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { maybeRecordLocation(DL, Asm->MF); } -MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) { +MCSymbol *CodeViewDebug::beginCVSubsection(ModuleDebugFragmentKind Kind) { MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), *EndLabel = MMI->getContext().createTempSymbol(); OS.EmitIntValue(unsigned(Kind), 4); @@ -2174,7 +2176,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { if (!EndLabel) { OS.AddComment("Symbol subsection for globals"); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); } // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); @@ -2192,7 +2194,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { OS.AddComment("Symbol subsection for " + Twine(GlobalValue::getRealLinkageName(GV->getName()))); switchToDebugSectionForSymbol(GVSym); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); endCVSubsection(EndLabel); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 343384c51772..46b2daa1e007 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -216,7 +216,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is /// finished. - MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind); + MCSymbol *beginCVSubsection(codeview::ModuleDebugFragmentKind Kind); void endCVSubsection(MCSymbol *EndLabel); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 16fb20dd7e20..8d25def7772c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -375,10 +375,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } -void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { - addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); -} - /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as @@ -662,6 +658,14 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { } } +/// Add thrown types. +void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) { + for (const auto *Ty : ThrownTypes) { + DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die); + addType(TT, cast<DIType>(Ty)); + } +} + DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { if (!Context || isa<DIFile>(Context)) return &getUnitDie(); @@ -1077,7 +1081,6 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { Name = "(anonymous namespace)"; DD->addAccelNamespace(Name, NDie); addGlobalName(Name, NDie, NS->getScope()); - addSourceLine(NDie, NS); if (NS->getExportSymbols()) addFlag(NDie, dwarf::DW_AT_export_symbols); return &NDie; @@ -1249,6 +1252,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, constructSubprogramArguments(SPDie, Args); } + addThrownTypes(SPDie, SP->getThrownTypes()); + if (SP->isArtificial()) addFlag(SPDie, dwarf::DW_AT_artificial); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index e84df4650882..8fc841703e23 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -210,7 +210,6 @@ public: void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); void addSourceLine(DIE &Die, const DIType *Ty); - void addSourceLine(DIE &Die, const DINamespace *NS); void addSourceLine(DIE &Die, const DIObjCProperty *Ty); /// Add constant value entry in variable DIE. @@ -230,6 +229,9 @@ public: /// Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DINodeArray TParams); + /// Add thrown types. + void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); + // FIXME: Should be reformulated in terms of addComplexAddress. /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual Block variable (navigating diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index c862cfd28add..c6c93811a0f9 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2226,10 +2226,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { ConstantInt *RetVal = lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakVH to hold onto it in case this + // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case + // this // happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); @@ -4442,9 +4443,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // using it. if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. - // Use a WeakVH to hold onto it in case this happens. + // Use a WeakTrackingVH to hold onto it in case this happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); @@ -5959,7 +5960,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { + if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 7b1b2d64fccc..65f58e5686e0 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -213,10 +213,8 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, VLIWPacketizerList::~VLIWPacketizerList() { - if (VLIWScheduler) - delete VLIWScheduler; - if (ResourceTracker) - delete ResourceTracker; + delete VLIWScheduler; + delete ResourceTracker; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 035a2ac78ed9..be0c5c2bb70e 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -37,7 +37,7 @@ bool CallLowering::lowerCall( for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; - setArgFlags(OrigArg, i + 1, DL, CS); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); OrigArgs.push_back(OrigArg); ++i; } @@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (FuncInfo.getParamAlignment(OpIdx)) - FrameAlign = FuncInfo.getParamAlignment(OpIdx); + if (FuncInfo.getParamAlignment(OpIdx - 2)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); Arg.Flags.setByValAlign(FrameAlign); diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5fb8dfc95d3f..75be7a55bd2a 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1199,9 +1199,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); - // Now that the MachineFrameInfo has been configured, no further changes to - // the reserved registers are possible. - MRI->freezeReservedRegs(*MF); + auto &TLI = *MF->getSubtarget().getTargetLowering(); + TLI.finalizeLowering(*MF); // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 942680b6fff3..c67da8629a3b 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -58,10 +58,11 @@ bool InstructionSelector::constrainSelectedInstRegOperands( MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), Reg, OpI)); - // Tie uses to defs as indicated in MCInstrDesc. + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. if (MO.isUse()) { int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); - if (DefIdx != -1) + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) I.tieOperands(DefIdx, OpI); } } diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index a2773cccc5db..bd04acd049db 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -541,7 +541,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); MFI.setAdjustsStack(YamlMFI.AdjustsStack); MFI.setHasCalls(YamlMFI.HasCalls); - MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + if (YamlMFI.MaxCallFrameSize != ~0u) + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index b6624b88fe23..d017b21f0a59 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -286,7 +286,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.MaxAlignment = MFI.getMaxAlignment(); YamlMFI.AdjustsStack = MFI.adjustsStack(); YamlMFI.HasCalls = MFI.hasCalls(); - YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize(); + YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed() + ? MFI.getMaxCallFrameSize() : ~0u; YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); diff --git a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp new file mode 100644 index 000000000000..7de8434df806 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -0,0 +1,218 @@ +//===-- MachineFrameInfo.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Implements MachineFrameInfo that manages the stack frame. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> + +#define DEBUG_TYPE "codegen" + +using namespace llvm; + +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!StackRealignable) + assert(Align <= StackAlignment && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + CreateStackObject(Size, Alignment, true); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable, bool isAliased) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. Note that unlike the non-fixed case, if the + // stack needs realignment, we can't assume that the stack will in fact be + // aligned. + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ false, + /*Alloca*/ nullptr, isAliased)); + return -++NumFixedObjects; +} + +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset, + bool Immutable) { + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ true, + /*Alloca*/ nullptr, + /*isAliased*/ false)); + return -++NumFixedObjects; +} + +BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) + BV.set(*CSR); + + // Saved CSRs are not pristine. + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); + + return BV; +} + +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index c9767a25e908..ac4ccb81b884 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -757,214 +757,6 @@ void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) { /// \} //===----------------------------------------------------------------------===// -// MachineFrameInfo implementation -//===----------------------------------------------------------------------===// - -/// Make sure the function is at least Align bytes aligned. -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; -} - -/// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign - << " when stack realignment is off" << '\n'); - return StackAlign; -} - -/// Create a new statically sized stack object, returning a nonnegative -/// identifier to represent it. -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, const AllocaInst *Alloca) { - assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, - !isSS)); - int Index = (int)Objects.size() - NumFixedObjects - 1; - assert(Index >= 0 && "Bad frame index!"); - ensureMaxAlignment(Alignment); - return Index; -} - -/// Create a new statically sized stack object that represents a spill slot, -/// returning a nonnegative identifier to represent it. -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - CreateStackObject(Size, Alignment, true); - int Index = (int)Objects.size() - NumFixedObjects - 1; - ensureMaxAlignment(Alignment); - return Index; -} - -/// Notify the MachineFrameInfo object that a variable sized object has been -/// created. This must be created whenever a variable sized object is created, -/// whether or not the index returned is actually used. -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, - const AllocaInst *Alloca) { - HasVarSizedObjects = true; - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); - ensureMaxAlignment(Alignment); - return (int)Objects.size()-NumFixedObjects-1; -} - -/// Create a new object at a fixed location on the stack. -/// All fixed objects should be created before other objects are created for -/// efficiency. By default, fixed objects are immutable. This returns an -/// index with a negative value. -int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isAliased) { - assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. Note that unlike the non-fixed case, if the - // stack needs realignment, we can't assume that the stack will in fact be - // aligned. - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr, isAliased)); - return -++NumFixedObjects; -} - -/// Create a spill slot at a fixed location on the stack. -/// Returns an index with a negative value. -int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, - int64_t SPOffset, - bool Immutable) { - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ true, - /*Alloca*/ nullptr, - /*isAliased*/ false)); - return -++NumFixedObjects; -} - -BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - BitVector BV(TRI->getNumRegs()); - - // Before CSI is calculated, no registers are considered pristine. They can be - // freely used and PEI will make sure they are saved. - if (!isCalleeSavedInfoValid()) - return BV; - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) - BV.set(*CSR); - - // Saved CSRs are not pristine. - for (auto &I : getCalleeSavedInfo()) - for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) - BV.reset(*S); - - return BV; -} - -unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - unsigned MaxAlign = getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { - if (isDeadObjectIndex(i)) - continue; - Offset += getObjectSize(i); - unsigned Align = getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (adjustsStack() || hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - -void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ - if (Objects.empty()) return; - - const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); - int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); - - OS << "Frame Objects:\n"; - - for (unsigned i = 0, e = Objects.size(); i != e; ++i) { - const StackObject &SO = Objects[i]; - OS << " fi#" << (int)(i-NumFixedObjects) << ": "; - if (SO.Size == ~0ULL) { - OS << "dead\n"; - continue; - } - if (SO.Size == 0) - OS << "variable sized"; - else - OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; - - if (i < NumFixedObjects) - OS << ", fixed"; - if (i < NumFixedObjects || SO.SPOffset != -1) { - int64_t Off = SO.SPOffset - ValOffset; - OS << ", at location [SP"; - if (Off > 0) - OS << "+" << Off; - else if (Off < 0) - OS << Off; - OS << "]"; - } - OS << "\n"; - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, dbgs()); -} -#endif - -//===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 1faf6292a9c1..d665201a5d17 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -2350,7 +2350,7 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, const MachineInstr &Orig, int FrameIndex) { const MDNode *Var = Orig.getDebugVariable(); - auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); + const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression()); bool IsIndirect = Orig.isIndirectDebugValue(); uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0; DebugLoc DL = Orig.getDebugLoc(); @@ -2359,13 +2359,8 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, // If the DBG_VALUE already was a memory location, add an extra // DW_OP_deref. Otherwise just turning this from a register into a // memory/indirect location is sufficient. - if (IsIndirect) { - SmallVector<uint64_t, 8> Ops; - Ops.push_back(dwarf::DW_OP_deref); - if (Expr) - Ops.append(Expr->elements_begin(), Expr->elements_end()); - Expr = DIExpression::get(Expr->getContext(), Ops); - } + if (IsIndirect) + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); return BuildMI(BB, I, DL, Orig.getDesc()) .addFrameIndex(FrameIndex) .addImm(Offset) diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 570a0cd0ba90..549f07ecd9ce 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -761,6 +761,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { + if (MFI.isDeadObjectIndex(i)) + continue; + unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1251ae6262b8..03698ac862af 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -236,10 +237,14 @@ namespace { SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitUADDO(SDNode *N); + SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitSUBC(SDNode *N); SDValue visitUSUBO(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitADDCARRY(SDNode *N); + SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); + SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); @@ -369,14 +374,14 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -396,6 +401,7 @@ namespace { SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); + SDValue matchVSelectOpSizesWithSetCC(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -644,7 +650,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags()->hasNoSignedZeros()) + !Op.getNode()->getFlags().hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -682,7 +688,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const SDNodeFlags *Flags = Op.getNode()->getFlags(); + const SDNodeFlags Flags = Op.getNode()->getFlags(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); @@ -965,8 +971,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) return false; // Revisit the node. @@ -1412,7 +1418,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SUBC: return visitSUBC(N); case ISD::USUBO: return visitUSUBO(N); case ISD::ADDE: return visitADDE(N); + case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); + case ISD::SUBCARRY: return visitSUBCARRY(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1866,14 +1874,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (isNullConstant(N1)) return N0; - // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { - if (N0.getOpcode() == ISD::SUB) - if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneConstantOrOneSplatConstant(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -1992,6 +2017,11 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } } + // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), + N0, N1.getOperand(0), N1.getOperand(2)); + return SDValue(); } @@ -2055,6 +2085,26 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + if (SDValue Combined = visitUADDOLike(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitUADDOLike(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + // If Y + 1 cannot overflow. + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { + SDValue Y = N1.getOperand(0); + SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); + if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, + N1.getOperand(2)); + } + return SDValue(); } @@ -2077,6 +2127,43 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitADDCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N1, N0, CarryIn); + + // fold (addcarry x, y, false) -> (uaddo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1); + + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) + return Combined; + + if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, + SDNode *N) { + // Iff the flag result is dead: + // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) && + isNullConstant(N1) && !N->hasAnyUseOfValue(1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N0.getOperand(0), N0.getOperand(1), CarryIn); + + return SDValue(); +} + // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, @@ -2143,13 +2230,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // 0 - X --> 0 if the sub is NUW. - if (N->getFlags()->hasNoUnsignedWrap()) + if (N->getFlags().hasNoUnsignedWrap()) return N0; if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. - if (N->getFlags()->hasNoSignedWrap()) + if (N->getFlags().hasNoSignedWrap()) return N0; // 0 - X --> X if X is 0 or the minimum signed value. @@ -2309,6 +2396,18 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (subcarry x, y, false) -> (usubo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2589,9 +2688,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -3766,7 +3864,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3880,8 +3978,15 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { SDValue N0 = N.getOperand(0); unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!N1C) return false; @@ -3952,7 +4057,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3967,18 +4072,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) @@ -5210,6 +5313,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // If the target supports masking y in (shl, y), + // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y) + if (TLI.isOperationLegal(ISD::SHL, VT) && + TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 @@ -5322,7 +5436,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && - cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { + N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t C1 = N0C1->getZExtValue(); uint64_t C2 = N1C->getZExtValue(); @@ -5347,7 +5461,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); SDValue Shift; if (c2 > c1) { - Mask = Mask.shl(c2 - c1); + Mask <<= c2 - c1; SDLoc DL(N); Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(c2 - c1, DL, N1.getValueType())); @@ -5408,6 +5522,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (sra, y), + // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y) + if (TLI.isOperationLegal(ISD::SRA, VT) && + TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // Arithmetic shifting an all-sign-bit value is a no-op. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5566,6 +5691,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (srl, y), + // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y) + if (TLI.isOperationLegal(ISD::SRL, VT) && + TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -5680,20 +5816,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N0.getOperand(0), Known); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); + if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero; + APInt UnknownBits = ~Known.Zero; if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. - if ((UnknownBits & (UnknownBits - 1)) == 0) { + if (UnknownBits.isPowerOf2()) { // Okay, we know that only that the single bit specified by UnknownBits // could be set on input to the CTLZ node. If this bit is set, the SRL // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair @@ -6889,6 +7025,51 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +/// If we're narrowing or widening the result of a vector select and the final +/// size is the same size as a setcc (compare) feeding the select, then try to +/// apply the cast operation to the select's operands because matching vector +/// sizes for a select condition and other operands should be more efficient. +SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { + unsigned CastOpcode = Cast->getOpcode(); + assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || + CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || + CastOpcode == ISD::FP_ROUND) && + "Unexpected opcode for vector select narrowing/widening"); + + // We only do this transform before legal ops because the pattern may be + // obfuscated by target-specific operations after legalization. Do not create + // an illegal select op, however, because that may be difficult to lower. + EVT VT = Cast->getValueType(0); + if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + SDValue VSel = Cast->getOperand(0); + if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || + VSel.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + + // Does the setcc have the same vector size as the casted select? + SDValue SetCC = VSel.getOperand(0); + EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); + if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) + SDValue A = VSel.getOperand(1); + SDValue B = VSel.getOperand(2); + SDValue CastA, CastB; + SDLoc DL(Cast); + if (CastOpcode == ISD::FP_ROUND) { + // FP_ROUND (fptrunc) has an extra flag operand to pass along. + CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); + CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); + } else { + CastA = DAG.getNode(CastOpcode, DL, VT, A); + CastB = DAG.getNode(CastOpcode, DL, VT, B); + } + return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7112,19 +7293,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } // isTruncateOf - If N is a truncate of some other value, return true, record -// the value being truncated in Op and which of Op's bits are zero in KnownZero. -// This function computes KnownZero to avoid a duplicated call to +// the value being truncated in Op and which of Op's bits are zero/one in Known. +// This function computes KnownBits to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, - APInt &KnownZero) { - APInt KnownOne; + KnownBits &Known) { if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); return true; } @@ -7143,9 +7326,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); - if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + if (!(Known.Zero | 1).isAllOnesValue()) return false; return true; @@ -7170,8 +7353,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. SDValue Op; - APInt KnownZero; - if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + KnownBits Known; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) { APInt TruncatedBits = (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? APInt(Op.getValueSizeInBits(), 0) : @@ -7179,7 +7362,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueSizeInBits(), std::min(Op.getValueSizeInBits(), VT.getSizeInBits())); - if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (TruncatedBits.isSubsetOf(Known.Zero)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) @@ -7446,6 +7629,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ShAmt); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -7802,7 +7988,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, DL, PtrType), - &Flags); + Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -8228,17 +8414,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return SDValue(N, 0); // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. - if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() && - !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) { + if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && + N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && + (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue), - X, Y, N0.getOperand(2)); + auto VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -8701,7 +8891,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } static bool isContractable(SDNode *N) { - SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags; + SDNodeFlags F = N->getFlags(); return F.hasAllowContract() || F.hasUnsafeAlgebra(); } @@ -9287,7 +9477,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9318,7 +9508,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { GetNegatedExpression(N0, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) @@ -9441,7 +9631,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9461,7 +9651,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -9512,7 +9702,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) { @@ -9656,7 +9846,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), - &Flags), &Flags); + Flags), Flags); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) @@ -9666,7 +9856,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), - &Flags), + Flags), N2); } } @@ -9692,16 +9882,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(1.0, DL, VT), Flags), + Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(-1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(-1.0, DL, VT), Flags), + Flags); } } @@ -9717,8 +9907,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; - const SDNodeFlags *Flags = N->getFlags(); - if (!UnsafeMath && !Flags->hasAllowReciprocal()) + const SDNodeFlags Flags = N->getFlags(); + if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal. @@ -9741,7 +9931,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. - if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + if (UnsafeMath || U->getFlags().hasAllowReciprocal()) Users.insert(U); } } @@ -9780,7 +9970,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9894,8 +10084,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, - &cast<BinaryWithFlagsSDNode>(N)->Flags); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -9915,7 +10104,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - return buildSqrtEstimate(N0, &Flags); + return buildSqrtEstimate(N0, Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -10190,6 +10379,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { Tmp, N0.getOperand(1)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10256,6 +10448,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10341,10 +10536,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (Level >= AfterLegalizeDAG && (TLI.isFPImmLegal(CVal, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1)), - &cast<BinaryWithFlagsSDNode>(N0)->Flags); + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), + N0->getFlags()); } } @@ -15832,7 +16027,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -15887,7 +16082,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -15931,7 +16126,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -15976,7 +16171,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -16029,11 +16224,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 377a5237f15a..a0135dc40b87 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -400,10 +400,10 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LOI->IsValid) return nullptr; - if (BitWidth > LOI->KnownZero.getBitWidth()) { + if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); - LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + LOI->Known.Zero = LOI->Known.Zero.zextOrTrunc(BitWidth); + LOI->Known.One = LOI->Known.One.zextOrTrunc(BitWidth); } return LOI; @@ -436,17 +436,15 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { Value *V = PN->getIncomingValue(0); if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); - DestLOI.KnownZero = ~Val; - DestLOI.KnownOne = Val; + DestLOI.Known.Zero = ~Val; + DestLOI.Known.One = Val; } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); @@ -463,25 +461,23 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { DestLOI = *SrcLOI; } - assert(DestLOI.KnownZero.getBitWidth() == BitWidth && - DestLOI.KnownOne.getBitWidth() == BitWidth && + assert(DestLOI.Known.Zero.getBitWidth() == BitWidth && + DestLOI.Known.One.getBitWidth() == BitWidth && "Masks should have the same bit width as the type."); for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); - DestLOI.KnownZero &= ~Val; - DestLOI.KnownOne &= Val; + DestLOI.Known.Zero &= ~Val; + DestLOI.Known.One &= Val; continue; } @@ -498,8 +494,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { return; } DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); - DestLOI.KnownZero &= SrcLOI->KnownZero; - DestLOI.KnownOne &= SrcLOI->KnownOne; + DestLOI.Known.Zero &= SrcLOI->Known.Zero; + DestLOI.Known.One &= SrcLOI->Known.One; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fdebb8bd00db..2654b3ad7a62 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2589,7 +2589,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); APInt Shift(Sz, 1); - Shift = Shift.shl(J); + Shift <<= J; Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); } @@ -3253,7 +3253,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags; + const SDNodeFlags Flags = Node->getFlags(); Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9ed70c9b4db9..92b0d2ae4015 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -21,6 +21,7 @@ #include "LegalizeTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -134,6 +135,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULO: case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; @@ -510,9 +514,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; - SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) }; + unsigned NumOps = N->getNumOperands(); + assert(NumOps <= 3 && "Too many operands"); + if (NumOps == 3) + Ops[2] = N->getOperand(2); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs), Ops); + DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -762,6 +771,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + llvm_unreachable("Not implemented"); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -924,6 +939,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1276,6 +1294,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { N->getOperand(0).getValueType().getScalarType()); } +SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDLoc DL(N); + + auto VT = getSetCCResultType(LHS.getValueType()); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT); + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrOneBooleanContent: + Carry = DAG.getZExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Carry = DAG.getSExtOrTrunc(Carry, DL, VT); + break; + } + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); +} //===----------------------------------------------------------------------===// // Integer Result Expansion @@ -1395,6 +1437,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADDE: case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; @@ -1525,11 +1570,11 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N->getOperand(1), Known); // If we don't know anything about the high bits, exit. - if (((KnownZero|KnownOne) & HighBitMask) == 0) + if (((Known.Zero|Known.One) & HighBitMask) == 0) return false; // Get the incoming operand to be shifted. @@ -1538,7 +1583,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that any of the high bits of the shift amount are one, then we // can do this as a couple of simple shifts. - if (KnownOne.intersects(HighBitMask)) { + if (Known.One.intersects(HighBitMask)) { // Mask out the high bit, which we know is set. Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, DAG.getConstant(~HighBitMask, dl, ShTy)); @@ -1563,7 +1608,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. - if ((KnownZero & HighBitMask) == HighBitMask) { + if (HighBitMask.isSubsetOf(Known.Zero)) { // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined // shift if x is zero. We can use XOR here because x is known to be smaller // than 32. @@ -1738,6 +1783,23 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasOpCarry) { + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + } + return; + } + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support // them. TODO: Teach operation legalization how to expand unsupported // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate @@ -1767,7 +1829,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasOVF) { - SDVTList VTList = DAG.getVTList(NVT, NVT); + EVT OvfVT = getSetCCResultType(NVT); + SDVTList VTList = DAG.getVTList(NVT, OvfVT); TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { @@ -1783,12 +1846,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: - OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); LLVM_FALLTHROUGH; case TargetLoweringBase::ZeroOrOneBooleanContent: + OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); break; case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + OVF = DAG.getSExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); } return; @@ -1866,6 +1931,71 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + SDValue Ovf; + + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasOpCarry) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(Opc, dl, VTList, HiOps); + + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ovf); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH, SDValue() }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2532,29 +2662,6 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } -void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDLoc dl(N); - - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - SplitInteger(Sum, Lo, Hi); - - // Calculate the overflow: addition overflows iff a + b < a, and subtraction - // overflows iff a - b > a. - SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, - N->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT); - - // Use the calculated overflow everywhere. - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index af55a22972a6..cde4331cc42d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -279,6 +279,7 @@ private: SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -311,6 +312,7 @@ private: SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -350,6 +352,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4a3160297d64..97a7fab6efd0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -523,16 +523,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } -/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
-/// so just return the element, ignoring the index.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue Res = GetScalarizedVector(N->getOperand(0));
- if (Res.getValueType() != VT)
- Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
- return Res;
-}
-
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != VT) + Res = VT.isFloatingPoint() + ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) + : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); + return Res; +} /// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT @@ -730,7 +731,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); @@ -2219,7 +2220,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2367,7 +2368,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 439f67f1e155..9d949a2bbfa6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -811,8 +811,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -832,8 +831,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -852,8 +850,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -901,29 +898,6 @@ void SelectionDAG::allnodes_clear() { #endif } -SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL, - SDVTList VTs, SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { - SDValue Ops[] = {N1, N2}; - - if (isBinOpWithFlags(Opcode)) { - // If no flags were passed in, use a default flags object. - SDNodeFlags F; - if (Flags == nullptr) - Flags = &F; - - auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, *Flags); - createOperands(FN, Ops); - - return FN; - } - - auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); - createOperands(N, Ops); - return N; -} - SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); @@ -985,6 +959,12 @@ void SelectionDAG::clear() { DbgInfo->clear(); } +SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) + ? getNode(ISD::FP_EXTEND, DL, VT, Op) + : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -1967,9 +1947,9 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, Depth); - return (KnownZero & Mask) == Mask; + KnownBits Known; + computeKnownBits(Op, Known, Depth); + return Mask.isSubsetOf(Known.Zero); } /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -1985,31 +1965,30 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are -/// those that are shared by every vector element. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// them in Known. For vectors, the known bits are those that are shared by +/// every vector element. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + computeKnownBits(Op, Known, DemandedElts, Depth); } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows -/// us to only collect the known bits that are shared by the requested vector -/// elements. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, +/// them in Known. The DemandedElts argument allows us to only collect the known +/// bits that are shared by the requested vector elements. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + const APInt &DemandedElts, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known = KnownBits(BitWidth); // Don't know anything. if (Depth == 6) return; // Limit search depth. - APInt KnownZero2, KnownOne2; + KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); if (!DemandedElts) @@ -2019,35 +1998,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, switch (Opcode) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && "Unexpected vector size"); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { if (!DemandedElts[i]) continue; SDValue SrcOp = Op.getOperand(i); - computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1); + computeKnownBits(SrcOp, Known2, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != BitWidth) { assert(SrcOp.getValueSizeInBits() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2.One = Known2.One.trunc(BitWidth); + Known2.Zero = Known2.Zero.trunc(BitWidth); } // Known bits are the values that are shared by every demanded element. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2055,7 +2034,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Collect the known bits that are shared by every vector element referenced // by the shuffle. APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); for (unsigned i = 0; i != NumElts; ++i) { @@ -2066,8 +2045,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (M < 0) { // For UNDEF elements, we don't know anything about the common state of // the shuffle result. - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); DemandedLHS.clearAllBits(); DemandedRHS.clearAllBits(); break; @@ -2081,24 +2060,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Known bits are the values that are shared by every demanded element. if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } case ISD::CONCAT_VECTORS: { // Split DemandedElts and test each of the demanded subvectors. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); @@ -2107,12 +2086,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, DemandedSub = DemandedSub.trunc(NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); - computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(Sub, Known2, DemandedSub, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2127,9 +2106,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1); + computeKnownBits(Src, Known, DemandedSrc, Depth + 1); } else { - computeKnownBits(Src, KnownZero, KnownOne, Depth + 1); + computeKnownBits(Src, Known, Depth + 1); } break; } @@ -2143,7 +2122,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Fast handling of 'identity' bitcasts. if (BitWidth == SubBitWidth) { - computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(N0, Known, DemandedElts, Depth + 1); break; } @@ -2167,10 +2146,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, SubDemandedElts.setBit(i * SubScale); for (unsigned i = 0; i != SubScale; ++i) { - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i), + computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i); - KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i); + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); } } @@ -2187,16 +2166,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (DemandedElts[i]) SubDemandedElts.setBit(i / SubScale); - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1); + computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % SubScale) * BitWidth; - KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); - KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); + Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } } @@ -2204,101 +2183,91 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + Known.One.clearAllBits(); + unsigned TrailZ = Known.Zero.countTrailingOnes() + + Known2.Zero.countTrailingOnes(); + unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + + Known2.Zero.countLeadingOnes(), BitWidth) - BitWidth; - KnownZero.clearAllBits(); - KnownZero.setLowBits(std::min(TrailZ, BitWidth)); - KnownZero.setHighBits(std::min(LeadZ, BitWidth)); + Known.Zero.clearAllBits(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned LeadZ = KnownZero2.countLeadingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned LeadZ = Known2.Zero.countLeadingOnes(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero.setHighBits(LeadZ); + Known.Zero.setHighBits(LeadZ); break; } case ISD::SELECT: - computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(3), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SMULO: case ISD::UMULO: @@ -2311,49 +2280,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero << *ShAmt; - KnownOne = KnownOne << *ShAmt; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero <<= *ShAmt; + Known.One <<= *ShAmt; // Low bits are known zero. - KnownZero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(ShAmt->getZExtValue()); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero.lshrInPlace(*ShAmt); - KnownOne.lshrInPlace(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // High bits are known zero. - KnownZero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(ShAmt->getZExtValue()); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero.lshrInPlace(*ShAmt); - KnownOne.lshrInPlace(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. APInt SignMask = APInt::getSignMask(BitWidth); SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. - if (KnownZero.intersects(SignMask)) { - KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (KnownOne.intersects(SignMask)) { - KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. + if (Known.Zero.intersects(SignMask)) { + Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. + } else if (Known.One.intersects(SignMask)) { + Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } break; @@ -2374,31 +2340,44 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignMask; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownOne &= InputDemandedBits; - KnownZero &= InputDemandedBits; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.One &= InputDemandedBits; + Known.Zero &= InputDemandedBits; // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero.intersects(InSignMask)) { // Input sign bit known clear - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne.intersects(InSignMask)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear + Known.Zero |= NewBits; + Known.One &= ~NewBits; + } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.One.countTrailingZeros(); + unsigned LowBits = Log2_32(PossibleTZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.One.countLeadingZeros(); + unsigned LowBits = Log2_32(PossibleLZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTPOP: { - KnownZero.setBitsFrom(Log2_32(BitWidth)+1); + Known.Zero.setBitsFrom(Log2_32(BitWidth)+1); break; } case ISD::LOAD: { @@ -2407,36 +2386,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero.setBitsFrom(MemBits); + Known.Zero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*Ranges, Known); } break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts.zext(InVT.getVectorNumElements()), Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG @@ -2444,49 +2422,47 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. - KnownZero = KnownZero.sext(BitWidth); - KnownOne = KnownOne.sext(BitWidth); + Known.Zero = Known.Zero.sext(BitWidth); + Known.One = Known.One.sext(BitWidth); break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.trunc(InBits); + Known.One = Known.One.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.zext(InBits); - KnownOne = KnownOne.zext(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.zext(InBits); + Known.One = Known.One.zext(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); break; } case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero |= (~InMask); - KnownOne &= (~KnownZero); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known.Zero |= (~InMask); + Known.One &= (~Known.Zero); break; } case ISD::FGETSIGN: // All bits are zero except the low bit. - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::USUBO: case ISD::SSUBO: @@ -2495,7 +2471,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2509,16 +2485,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { + if ((Known2.Zero & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero.setHighBits(NLZ2); + Known.Zero.setHighBits(NLZ2); } } } @@ -2526,27 +2502,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If low bits are know to be zero in both operands, then we know they are // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); if (KnownZeroLow == 0) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); - KnownZero.setBits(0, KnownZeroLow); + Known2.Zero.countTrailingOnes()); + Known.Zero.setLowBits(KnownZeroLow); break; } case ISD::UADDO: case ISD::SADDO: + case ISD::ADDCARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2560,31 +2535,30 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroHigh = Known2.Zero.countLeadingOnes(); + unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); KnownZeroHigh = std::min(KnownZeroHigh, - KnownZero2.countLeadingOnes()); + Known2.Zero.countLeadingOnes()); KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); + Known2.Zero.countTrailingOnes()); - if (Opcode == ISD::ADDE) { - // With ADDE, a carry bit may be added in, so we can only use this - // information if we know (at least) that the low two bits are clear. - // We then return to the caller that the low bit is unknown but that - // other bits are known zero. + if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { + // With ADDE and ADDCARRY, a carry bit may be added in, so we can only + // use this information if we know (at least) that the low two bits are + // clear. We then return to the caller that the low bit is unknown but + // that other bits are known zero. if (KnownZeroLow >= 2) - KnownZero.setBits(1, KnownZeroLow); + Known.Zero.setBits(1, KnownZeroLow); break; } - KnownZero.setLowBits(KnownZeroLow); + Known.Zero.setLowBits(KnownZeroLow); if (KnownZeroHigh > 1) - KnownZero.setHighBits(KnownZeroHigh - 1); + Known.Zero.setHighBits(KnownZeroHigh - 1); break; } case ISD::SREM: @@ -2592,23 +2566,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + Known.One |= ~LowBits; + assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } } break; @@ -2617,42 +2590,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The upper bits are all zero, the lower ones are unchanged. - KnownZero = KnownZero2 | ~LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero | ~LowBits; + Known.One = Known2.One & LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - - uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + + uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + Known.Zero.setHighBits(Leaders); break; } case ISD::EXTRACT_ELEMENT: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), Known, Depth+1); const unsigned Index = Op.getConstantOperandVal(1); const unsigned BitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask - KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); - KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); + Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); // Remove high part of known bit mask - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2665,22 +2635,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.trunc(EltBitWidth); - KnownOne = KnownOne.trunc(EltBitWidth); + Known.Zero = Known.Zero.trunc(EltBitWidth); + Known.One = Known.One.trunc(EltBitWidth); } ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { // If we know the element index, just demand that vector element. unsigned Idx = ConstEltNo->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1); + computeKnownBits(InVec, Known, DemandedElt, Depth + 1); } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); + computeKnownBits(InVec, Known, Depth + 1); } if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); } break; } @@ -2693,117 +2663,110 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { // If we know the element index, split the demand between the // source vector and the inserted element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then add its common known bits. if (DemandedElts[EltIdx]) { - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; } // If we demand the source vector then add its common known bits, ensuring // that we don't demand the inserted element. APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); if (!!VectorElts) { - computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(InVec, Known2, VectorElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVec, Known, Depth + 1); + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; } break; } case ISD::BITREVERSE: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.reverseBits(); - KnownOne = KnownOne2.reverseBits(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.reverseBits(); + Known.One = Known2.One.reverseBits(); break; } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.byteSwap(); - KnownOne = KnownOne2.byteSwap(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.byteSwap(); + Known.One = Known2.One.byteSwap(); break; } case ISD::ABS: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If the source's MSB is zero then we know the rest of the bits already. - if (KnownZero2[BitWidth - 1]) { - KnownZero = KnownZero2; - KnownOne = KnownOne2; + if (Known2.isNonNegative()) { + Known.Zero = Known2.Zero; + Known.One = Known2.One; break; } // We only know that the absolute values's MSB will be zero iff there is // a set bit that isn't the sign bit (otherwise it could be INT_MIN). - KnownOne2.clearBit(BitWidth - 1); - if (KnownOne2.getBoolValue()) { - KnownZero = APInt::getSignMask(BitWidth); + Known2.One.clearSignBit(); + if (Known2.One.getBoolValue()) { + Known.Zero = APInt::getSignMask(BitWidth); break; } break; } case ISD::UMIN: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. - unsigned LeadZero = KnownZero.countLeadingOnes(); - LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + unsigned LeadZero = Known.Zero.countLeadingOnes(); + LeadZero = std::max(LeadZero, Known2.Zero.countLeadingOnes()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownZero.setHighBits(LeadZero); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.Zero.setHighBits(LeadZero); break; } case ISD::UMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. - unsigned LeadOne = KnownOne.countLeadingOnes(); - LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + unsigned LeadOne = Known.One.countLeadingOnes(); + LeadOne = std::max(LeadOne, Known2.One.countLeadingOnes()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownOne.setHighBits(LeadOne); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.One.setHighBits(LeadOne); break; } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. - KnownZero.setLowBits(Log2_32(Align)); + Known.Zero.setLowBits(Log2_32(Align)); break; } break; @@ -2816,12 +2779,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts, - *this, Depth); + TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); break; } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2830,28 +2792,28 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, if (isNullConstant(N1)) return OFK_Never; - APInt N1Zero, N1One; - computeKnownBits(N1, N1Zero, N1One); - if (N1Zero.getBoolValue()) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N1Known; + computeKnownBits(N1, N1Known); + if (N1Known.Zero.getBoolValue()) { + KnownBits N0Known; + computeKnownBits(N0, N0Known); bool overflow; - (void)(~N0Zero).uadd_ov(~N1Zero, overflow); + (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Zero & 0x01) == ~N1Zero) + (~N1Known.Zero & 0x01) == ~N1Known.Zero) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N0Known; + computeKnownBits(N0, N0Known); - if ((~N0Zero & 0x01) == ~N0Zero) + if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) return OFK_Never; } @@ -2895,10 +2857,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // to handle some common cases. // Fall back to computeKnownBits to catch other known cases. - APInt KnownZero, KnownOne; - computeKnownBits(Val, KnownZero, KnownOne); - return (KnownZero.countPopulation() == BitWidth - 1) && - (KnownOne.countPopulation() == 1); + KnownBits Known; + computeKnownBits(Val, Known); + return (Known.Zero.countPopulation() == BitWidth - 1) && + (Known.One.countPopulation() == 1); } unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { @@ -2971,7 +2933,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(Tmp, Tmp2); case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt ShiftVal = C->getAPIntValue(); @@ -3068,17 +3030,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(0), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp; } @@ -3093,16 +3055,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(1), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -3134,6 +3096,44 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then get its sign bits. + Tmp = UINT_MAX; + if (DemandedElts[EltIdx]) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; + Tmp = ComputeNumSignBits(InVal, Depth + 1); + } + + // If we demand the source vector then get its sign bits, and determine + // the minimum. + APInt VectorElts = DemandedElts; + VectorElts.clearBit(EltIdx); + if (!!VectorElts) { + Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Tmp = ComputeNumSignBits(InVec, Depth + 1); + Tmp2 = ComputeNumSignBits(InVal, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } case ISD::EXTRACT_VECTOR_ELT: { SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); @@ -3199,14 +3199,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + KnownBits Known; + computeKnownBits(Op, Known, DemandedElts, Depth); APInt Mask; - if (KnownZero.isNegative()) { // sign bit is 0 - Mask = KnownZero; - } else if (KnownOne.isNegative()) { // sign bit is 1; - Mask = KnownOne; + if (Known.isNonNegative()) { // sign bit is 0 + Mask = Known.Zero; + } else if (Known.isNegative()) { // sign bit is 1; + Mask = Known.One; } else { // Nothing known. return FirstAnswer; @@ -3239,8 +3239,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { if (getTarget().Options.NoNaNsFPMath) return true; - if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op)) - return BF->Flags.hasNoNaNs(); + if (Op->getFlags().hasNoNaNs()) + return true; // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) @@ -3284,11 +3284,10 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - APInt AZero, AOne; - APInt BZero, BOne; - computeKnownBits(A, AZero, AOne); - computeKnownBits(B, BZero, BOne); - return (AZero | BZero).isAllOnesValue(); + KnownBits AKnown, BKnown; + computeKnownBits(A, AKnown); + computeKnownBits(B, BKnown); + return (AKnown.Zero | BKnown.Zero).isAllOnesValue(); } static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, @@ -3357,7 +3356,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue Operand) { + SDValue Operand, const SDNodeFlags Flags) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -3683,8 +3682,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0), - &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags); + Operand.getNode()->getOperand(0), + Operand.getNode()->getFlags()); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3701,10 +3700,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -3883,7 +3885,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -3975,8 +3977,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { + SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -4161,7 +4162,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { unsigned FromBits = EVT.getScalarSizeInBits(); Val <<= Val.getBitWidth() - FromBits; - Val = Val.ashr(Val.getBitWidth() - FromBits); + Val.ashrInPlace(Val.getBitWidth() - FromBits); return getConstant(Val, DL, ConstantVT); }; @@ -4443,21 +4444,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2}; if (VT != MVT::Glue) { - SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return SDValue(E, 0); } - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); @@ -5979,7 +5982,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -6641,14 +6644,13 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return E; } } @@ -7392,15 +7394,8 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -const SDNodeFlags *SDNode::getFlags() const { - if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) - return &FlagsNode->Flags; - return nullptr; -} - -void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { - if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) - FlagsNode->Flags.intersectWith(Flags); +void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { + this->Flags.intersectWith(Flags); } SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6a737ed84ea4..ba9e11798f15 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -83,20 +83,6 @@ LimitFPPrecision("limit-float-precision", "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); - -/// Minimum jump table density for normal functions. -static cl::opt<unsigned> -JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "a normal function")); - -/// Minimum jump table density for -Os or -Oz functions. -static cl::opt<unsigned> -OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "an optsize function")); - - // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -364,7 +350,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); return DAG.getBuildVector(ValueVT, DL, Val); } @@ -557,10 +544,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } @@ -675,7 +661,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.Zero.countLeadingOnes(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -1349,7 +1335,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), - &Flags); + Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. @@ -2589,7 +2575,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); + Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2642,7 +2628,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - &Flags); + Flags); setValue(&I, Res); } @@ -2654,7 +2640,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { Flags.setExact(isa<PossiblyExactOperator>(&I) && cast<PossiblyExactOperator>(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, &Flags)); + Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -3266,7 +3252,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { MVT PtrTy = @@ -3296,7 +3282,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3374,7 +3360,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3478,7 +3464,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), - &Flags); + Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; @@ -3633,7 +3619,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); @@ -7897,7 +7883,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), &Flags); + PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), @@ -8187,15 +8173,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); // Set up the incoming argument description vector. - unsigned Idx = 0; for (const Argument &Arg : F.args()) { - ++Idx; + unsigned ArgNo = Arg.getArgNo(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; Type *FinalType = Arg.getType(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) FinalType = cast<PointerType>(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); @@ -8206,11 +8191,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + if (Arg.hasAttribute(Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && @@ -8223,15 +8208,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set InReg Flag Flags.setInReg(); } - if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -8242,7 +8227,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. - if (Idx == 1) + if (ArgNo == 0) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { @@ -8252,13 +8237,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (F.getParamAlignment(Idx)) - FrameAlign = F.getParamAlignment(Idx); + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -8270,7 +8255,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - Idx-1, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -8311,7 +8296,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8333,14 +8317,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. - // Idx indexes LLVM arguments. Don't touch it. ++i; } SmallVector<SDValue, 4> Chains; DenseMap<int, int> ArgCopyElisionFrameIndexMap; for (const Argument &Arg : F.args()) { - ++Idx; SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); @@ -8362,7 +8344,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && - F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); + Arg.hasAttribute(Attribute::SwiftError); if (!ArgHasUses && !isSwiftErrorArg) { SDB->setUnusedArgValue(&Arg, InVals[i]); @@ -8382,9 +8364,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // function. if (ArgHasUses || isSwiftErrorArg) { Optional<ISD::NodeType> AssertOp; - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, @@ -8589,13 +8571,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last, - unsigned Density) const { +uint64_t +SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) const { assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - const APInt &LowCase = Clusters[First].Low->getValue(); const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); @@ -8604,26 +8583,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. - uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); - uint64_t Range = Diff + 1; + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} +uint64_t SelectionDAGBuilder::getJumpTableNumCases( + const SmallVectorImpl<unsigned> &TotalCases, unsigned First, + unsigned Last) const { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - - return NumCases * 100 >= Range * Density; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI, - const SwitchInst *SI) { - const Function *Fn = SI->getParent()->getParent(); - if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") - return false; - - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return NumCases; } bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, @@ -8662,10 +8632,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumDests = JTProbs.size(); - if (isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), - Clusters[Last].High->getValue())) { + if (TLI.isSuitableForBitTests( + NumDests, NumCmps, Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), DAG.getDataLayout())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } @@ -8686,7 +8657,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, } JumpTableMBB->normalizeSuccProbs(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); @@ -8715,17 +8685,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI, SI)) + if (!TLI.areJTsAllowed(SI->getParent()->getParent())) return; - const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); - const int64_t N = Clusters.size(); const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - const unsigned MaxJumpTableSize = - OptForSize || TLI.getMaximumJumpTableSize() == 0 - ? UINT_MAX : TLI.getMaximumJumpTableSize(); if (N < 2 || N < MinJumpTableEntries) return; @@ -8740,15 +8705,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - const unsigned MinDensity = - OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; - // Cheap case: the whole range may be suitable for jump table. - unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - - Clusters[0].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8801,11 +8763,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - JumpTableSize = (Clusters[j].High->getValue() - - Clusters[i].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, i, j, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters, i, j); + uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; @@ -8849,36 +8811,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, Clusters.resize(DstIndex); } -bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { - // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); - uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; - return Range <= BW; -} - -bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, - unsigned NumCmps, - const APInt &Low, - const APInt &High) { - // FIXME: I don't think NumCmps is the correct metric: a single case and a - // range of cases both require only one branch to lower. Just looking at the - // number of clusters and destinations should be enough to decide whether to - // build bit tests. - - // To lower a range with bit tests, the range must fit the bitwidth of a - // machine word. - if (!rangeFitsInWord(Low, High)) - return false; - - // Decide whether it's profitable to lower this range with bit tests. Each - // destination requires a bit test and branch, and there is an overall range - // check branch. For a small number of clusters, separate comparisons might be - // cheaper, and for many destinations, splitting the range might be better. - return (NumDests == 1 && NumCmps >= 3) || - (NumDests == 2 && NumCmps >= 5) || - (NumDests == 3 && NumCmps >= 6); -} - bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, @@ -8900,16 +8832,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) return false; APInt LowBound; APInt CmpRange; - const int BitWidth = DAG.getTargetLoweringInfo() - .getPointerTy(DAG.getDataLayout()) - .getSizeInBits(); - assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); + assert(TLI.rangeFitsInWord(Low, High, DL) && + "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. @@ -8999,7 +8932,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + const DataLayout &DL = DAG.getDataLayout(); + + EVT PTy = TLI.getPointerTy(DL); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -9030,8 +8965,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // Try building a partition from Clusters[i..j]. // Check the range. - if (!rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue())) + if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), DL)) continue; // Check nbr of destinations and cluster types. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e34590cc39c..9e9989058ae5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -304,10 +304,13 @@ private: BranchProbability DefaultProb; }; - /// Check whether a range of clusters is dense enough for a jump table. - bool isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl<unsigned> &TotalCases, - unsigned First, unsigned Last, unsigned MinDensity) const; + /// Return the range of value in [First..Last]. + uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, + unsigned Last) const; + + /// Return the number of cases in [First..Last]. + uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases, + unsigned First, unsigned Last) const; /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. @@ -319,14 +322,6 @@ private: void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB); - /// Check whether the range [Low,High] fits in a machine word. - bool rangeFitsInWord(const APInt &Low, const APInt &High); - - /// Check whether these clusters are suitable for lowering with bit tests based - /// on the number of destinations, comparison metric, and range. - bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, - const APInt &Low, const APInt &High); - /// Build a bit test cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, @@ -777,6 +772,11 @@ public: bool VarArgDisallowed, bool ForceVoidReturnTy); + /// Returns the type of FrameIndex and TargetFrameIndex nodes. + MVT getFrameIndexTy() { + return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()); + } + private: // Terminator instructions. void visitRet(const ReturnInst &I); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 488c60a28ffb..26dd45ef933f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -227,6 +227,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; + case ISD::ADDCARRY: return "addcarry"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; @@ -235,6 +236,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULO: return "umulo"; case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; + case ISD::SUBCARRY: return "subcarry"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e21204dbb966..3aabdaeaa094 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -592,13 +593,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } - if (TLI->hasCopyImplyingStackAdjustment(MF)) - MFI.setHasCopyImplyingStackAdjustment(true); - - // Freeze the set of reserved registers now that MachineFrameInfo has been - // set up. All the information required by getReservedRegs() should be - // available now. - MRI.freezeReservedRegs(*MF); + TLI->finalizeLowering(*MF); // Release function-specific state. SDB and CurDAG are already cleared // at this point. @@ -650,8 +645,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt KnownZero; - APInt KnownOne; + KnownBits Known; do { SDNode *N = Worklist.pop_back_val(); @@ -680,8 +674,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->computeKnownBits(Src, KnownZero, KnownOne); - FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, Known); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); } while (!Worklist.empty()); } @@ -1930,11 +1924,11 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(LHS, Known); // If all the missing bits in the or are already known to be set, match! - if ((NeededMask & KnownOne) == NeededMask) + if (NeededMask.isSubsetOf(Known.One)) return true; // TODO: check to see if missing bits are just not demanded. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index d27e2455978d..c0a5041b1395 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -242,7 +242,8 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, // Cache this slot so we find it when going through the normal // assignment loop. - SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + SDValue Loc = + Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy()); Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -343,7 +344,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, Builder); int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA - Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); // TODO: We can create TokenFactor node instead of // chaining stores one after another, this may allow @@ -391,8 +392,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, // This handles allocas as arguments to the statepoint (this is only // really meaningful for a deopt value. For GC, we'd be trying to // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } else if (LiveInOnly) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll @@ -527,8 +530,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } } @@ -949,8 +954,8 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, - SD.getValueType()); + SDValue SpillSlot = + DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, @@ -958,7 +963,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()), + getCurSDLoc(), Chain, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), *DerivedPtrLocation)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 136dec873cb8..2d39ecd9779b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -109,8 +110,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError); - // FIXME: getParamAlignment is off by one from argument index. - Alignment = CS->getParamAlignment(ArgIdx + 1); + Alignment = CS->getParamAlignment(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -437,10 +437,9 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); - APInt KnownZero, KnownOne; + KnownBits Known; - if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - TLO, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true)) return false; @@ -488,10 +487,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); - APInt KnownZero, KnownOne; + KnownBits Known; - bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne, - TLO); + bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO); if (Simplified) DCI.CommitTargetLoweringOpt(TLO); return Simplified; @@ -501,13 +499,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the /// original and new nodes in Old and New. Otherwise, analyze the expression and -/// return a mask of KnownOne and KnownZero bits for the expression (used to -/// simplify the caller). The KnownZero/One bits may only be accurate for those -/// bits in the DemandedMask. +/// return a mask of Known bits for the expression (used to simplify the +/// caller). The Known bits may only be accurate for those bits in the +/// DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { @@ -519,14 +516,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, auto &DL = TLO.DAG.getDataLayout(); // Don't know anything. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { - // If not at the root, Just compute the KnownZero/KnownOne bits to + // If not at the root, Just compute the Known bits to // simplify things downstream. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -541,38 +538,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } - APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + KnownBits Known2, KnownOut; switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (SDValue SrcOp : Op->ops()) { if (!isa<ConstantSDNode>(SrcOp)) { // We can only handle all constant values - bail out with no known bits. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); return false; } - KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); - KnownZero2 = ~KnownOne2; + Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); + Known2.Zero = ~Known2.One; // BUILD_VECTOR can implicitly truncate sources, we must handle this. - if (KnownOne2.getBitWidth() != BitWidth) { - assert(KnownOne2.getBitWidth() > BitWidth && - KnownZero2.getBitWidth() > BitWidth && + if (Known2.One.getBitWidth() != BitWidth) { + assert(Known2.getBitWidth() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2.One = Known2.One.trunc(BitWidth); + Known2.Zero = Known2.Zero.trunc(BitWidth); } // Known bits are the values that are shared by every element. // TODO: support per-element known bits. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } return false; // Don't fall through, will infinitely loop. case ISD::AND: @@ -582,16 +578,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the RHS. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { SDValue Op0 = Op.getOperand(0); - APInt LHSZero, LHSOne; + KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); // If the LHS already has zeros where RHSC does, this and is dead. - if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -600,64 +596,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the xor. For example, for a 32-bit X: // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && - LHSOne == ~RHSC->getAPIntValue()) { + LHSKnown.One == ~RHSC->getAPIntValue()) { SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), Op0.getOperand(0), Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known2.Zero | Known.One)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + if (NewMask.isSubsetOf(Known.Zero | Known2.One)) return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. - if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + if (NewMask.isSubsetOf(Known2.One | Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(1)); - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known.One | Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -667,25 +655,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; case ISD::XOR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((KnownZero & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((KnownZero2 & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) @@ -694,25 +680,25 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If all of the unknown bits are known to be zero on one side or the other // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side - if (KnownOne == KnownOne2) { // set bits are the same on both sides + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side + if (Known.One == Known2.One) { // set bits are the same on both sides EVT VT = Op.getValueType(); - SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); + SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } @@ -738,44 +724,39 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - KnownZero = std::move(KnownZeroOut); - KnownOne = std::move(KnownOneOut); + Known = std::move(KnownOut); break; case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); @@ -801,7 +782,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (getBooleanContents(Op0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } case ISD::SHL: @@ -835,8 +816,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1)) return true; // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits @@ -885,10 +865,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - KnownZero <<= SA->getZExtValue(); - KnownOne <<= SA->getZExtValue(); + Known.Zero <<= SA->getZExtValue(); + Known.One <<= SA->getZExtValue(); // low bits known zero. - KnownZero.setLowBits(SA->getZExtValue()); + Known.Zero.setLowBits(SA->getZExtValue()); } break; case ISD::SRL: @@ -906,7 +886,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a @@ -931,14 +911,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Compute the new bits that are at the top now. - if (SimplifyDemandedBits(InOp, InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShAmt); - KnownOne.lshrInPlace(ShAmt); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); - KnownZero.setHighBits(ShAmt); // High bits known zero. + Known.Zero.setHighBits(ShAmt); // High bits known zero. } break; case ISD::SRA: @@ -963,33 +942,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits()); + if (NewMask.countLeadingZeros() < ShAmt) + InDemandedMask.setSignBit(); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, + Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShAmt); - KnownOne.lshrInPlace(ShAmt); - - // Handle the sign bit, adjusted to where it is now in the mask. - APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) { + if (Known.Zero[BitWidth - ShAmt - 1] || + NewMask.countLeadingZeros() >= ShAmt) { SDNodeFlags Flags; - Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()); + Flags.setExact(Op->getFlags().hasExact()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), - Op.getOperand(1), &Flags)); + Op.getOperand(1), Flags)); } int Log2 = NewMask.exactLogBase2(); @@ -1002,9 +978,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), NewSA)); } - if (KnownOne.intersects(SignMask)) + if (Known.One[BitWidth - ShAmt - 1]) // New bits are known one. - KnownOne |= HighBits; + Known.One.setHighBits(ShAmt); } break; case ISD::SIGN_EXTEND_INREG: { @@ -1057,24 +1033,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InputDemandedBits |= InSignBit; if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (KnownOne.intersects(InSignBit)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.One.intersects(InSignBit)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } @@ -1085,22 +1061,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); - APInt KnownZeroLo, KnownOneLo; - APInt KnownZeroHi, KnownOneHi; + KnownBits KnownLo, KnownHi; - if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, - KnownOneLo, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, - KnownOneHi, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) return true; - KnownZero = KnownZeroLo.zext(BitWidth) | - KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + Known.Zero = KnownLo.Zero.zext(BitWidth) | + KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); - KnownOne = KnownOneLo.zext(BitWidth) | - KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + Known.One = KnownLo.One.zext(BitWidth) | + KnownHi.One.zext(BitWidth).shl(HalfBitWidth); break; } case ISD::ZERO_EXTEND: { @@ -1115,13 +1088,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getValueType(), Op.getOperand(0))); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero |= NewBits; + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + Known.Zero |= NewBits; break; } case ISD::SIGN_EXTEND: { @@ -1143,37 +1115,36 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InDemandedBits |= InSignBit; InDemandedBits = InDemandedBits.trunc(InBits); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, + Depth+1)) return true; - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // If the sign bit is known one, the top bits match. - if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - assert((KnownZero & NewBits) == 0); + if (Known.One.intersects(InSignBit)) { + Known.One |= NewBits; + assert((Known.Zero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - assert((KnownOne & NewBits) == 0); - assert((KnownZero & NewBits) == 0); + assert((Known.One & NewBits) == 0); + assert((Known.Zero & NewBits) == 0); } break; } case ISD::ANY_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt InMask = NewMask.trunc(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1181,11 +1152,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // zero/one bits live out. unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt TruncMask = NewMask.zext(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1)) return true; - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. @@ -1233,7 +1203,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1243,11 +1213,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - KnownZero |= ~InMask; + Known.Zero |= ~InMask; break; } case ISD::BITCAST: @@ -1285,22 +1255,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // of the highest bit demanded of them. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || + SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { - const SDNodeFlags *Flags = Op.getNode()->getFlags(); - if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. - SDNodeFlags NewFlags = *Flags; - NewFlags.setNoSignedWrap(false); - NewFlags.setNoUnsignedWrap(false); + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1), - &NewFlags); + Flags); return TLO.CombineTo(Op, NewOp); } return true; @@ -1309,13 +1276,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } default: // Just use computeKnownBits to compute output bits. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); break; } // If we know the value of all of the demanded bits, return this as a // constant. - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNodeIterator I = SDNodeIterator::begin(N), @@ -1326,17 +1293,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } return TLO.CombineTo(Op, - TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); } return false; } /// Determine which of the bits specified in Mask are known to be either zero or -/// one and return them in the KnownZero/KnownOne bitsets. +/// one and return them in the Known. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { @@ -1346,7 +1312,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); } /// This method can be implemented by targets that want to expose additional @@ -1721,7 +1687,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, bestWidth = width; break; } - newMask = newMask << width; + newMask <<= width; } } } @@ -2986,9 +2952,9 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags); Created.push_back(Op1.getNode()); - d = d.ashr(ShAmt); + d.ashrInPlace(ShAmt); } // Calculate the multiplicative inverse, using Newton's method. @@ -3030,7 +2996,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return SDValue(); // If the sdiv has an 'exact' bit we can use a simpler lowering. - if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact()) + if (N->getFlags().hasExact()) return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); APInt::ms magics = Divisor.magic(); diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index 315b059c5ac9..916b6f08c1b9 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -41,8 +41,8 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" static cl::opt<int> StackMapVersion( - "stackmap-version", cl::init(2), - cl::desc("Specify the stackmap encoding version (default = 2)")); + "stackmap-version", cl::init(3), + cl::desc("Specify the stackmap encoding version (default = 3)")); const char *StackMaps::WSMP = "Stack Maps: "; @@ -85,7 +85,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { } StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { - if (StackMapVersion != 2) + if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); } @@ -221,8 +221,9 @@ void StackMaps::print(raw_ostream &OS) { OS << "Constant Index " << Loc.Offset; break; } - OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size - << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0" + << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0" + << ", .int " << Loc.Offset << "]\n"; Idx++; } @@ -521,11 +522,16 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { for (const auto &Loc : CSLocs) { OS.EmitIntValue(Loc.Type, 1); - OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(0, 1); // Reserved + OS.EmitIntValue(Loc.Size, 2); OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(0, 2); // Reserved OS.EmitIntValue(Loc.Offset, 4); } + // Emit alignment to 8 byte. + OS.EmitValueToAlignment(8); + // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index e579922bb69e..39aa946fa840 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -53,6 +54,18 @@ static cl::opt<unsigned> MaximumJumpTableSize ("max-jump-table-size", cl::init(0), cl::Hidden, cl::desc("Set maximum size of jump tables; zero for no limit.")); +/// Minimum jump table density for normal functions. +static cl::opt<unsigned> + JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt<unsigned> OptsizeJumpTableDensity( + "optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -910,6 +923,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + // ADDCARRY operations default to expand + setOperationAction(ISD::ADDCARRY, VT, Expand); + setOperationAction(ISD::SUBCARRY, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -1901,6 +1918,10 @@ void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { MinimumJumpTableEntries = Val; } +unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { + return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; +} + unsigned TargetLoweringBase::getMaximumJumpTableSize() const { return MaximumJumpTableSize; } @@ -2092,3 +2113,7 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT, MachineFunction &MF) const { return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); } + +void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { + MF.getRegInfo().freezeReservedRegs(MF); +} diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index c2db56a7657c..f085132b6a94 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -195,18 +196,30 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } if (phi->getNumOperands() == 3) { - unsigned Input = phi->getOperand(1).getReg(); - unsigned Output = phi->getOperand(0).getReg(); - - phi++->eraseFromParent(); + const MachineOperand &Input = phi->getOperand(1); + const MachineOperand &Output = phi->getOperand(0); + unsigned InputReg = Input.getReg(); + unsigned OutputReg = Output.getReg(); + assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; - if (Input != Output) { + if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); - MRI.constrainRegClass(Input, MRI.getRegClass(Output)); - MRI.replaceRegWith(Output, Input); + unsigned InputSub = Input.getSubReg(); + if (InputSub == 0) { + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)); + MRI.replaceRegWith(OutputReg, InputReg); + } else { + // The input register to the PHI has a subregister: + // insert a COPY instead of simply replacing the output + // with the input. + const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); + BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), + TII->get(TargetOpcode::COPY), OutputReg) + .addReg(InputReg, getRegState(Input), InputSub); + } + phi++->eraseFromParent(); } - continue; } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp index 0e20bcb27ec9..fc6008ba66de 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp @@ -245,20 +245,20 @@ static const EnumEntry<uint32_t> FrameProcSymFlagNames[] = { }; static const EnumEntry<uint32_t> ModuleSubstreamKindNames[] = { - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, None), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Symbols), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Lines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, StringTable), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FileChecksums), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FrameData), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, InlineeLines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeImports), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeExports), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, ILLines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FuncMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, TypeMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, MergedAssemblyInput), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CoffSymbolRVA), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, None), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Symbols), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Lines), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, StringTable), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FileChecksums), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FrameData), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, InlineeLines), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeImports), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeExports), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, ILLines), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FuncMDTokenMap), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, TypeMDTokenMap), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, MergedAssemblyInput), + CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CoffSymbolRVA), }; static const EnumEntry<uint16_t> ExportSymFlagNames[] = { diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp new file mode 100644 index 000000000000..42f0afc3e2d7 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp @@ -0,0 +1,107 @@ +//===- ModuleDebugFileChecksumFragment.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +struct FileChecksumEntryHeader { + using ulittle32_t = support::ulittle32_t; + + ulittle32_t FileNameOffset; // Byte offset of filename in global string table. + uint8_t ChecksumSize; // Number of bytes of checksum. + uint8_t ChecksumKind; // FileChecksumKind + // Checksum bytes follow. +}; + +Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract( + BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { + BinaryStreamReader Reader(Stream); + + const FileChecksumEntryHeader *Header; + if (auto EC = Reader.readObject(Header)) + return EC; + + Item.FileNameOffset = Header->FileNameOffset; + Item.Kind = static_cast<FileChecksumKind>(Header->ChecksumKind); + if (auto EC = Reader.readBytes(Item.Checksum, Header->ChecksumSize)) + return EC; + + Len = alignTo(Header->ChecksumSize + sizeof(FileChecksumEntryHeader), 4); + return Error::success(); +} + +Error ModuleDebugFileChecksumFragmentRef::initialize( + BinaryStreamReader Reader) { + if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining())) + return EC; + + return Error::success(); +} + +ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment( + StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums), + Strings(Strings) {} + +void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, + FileChecksumKind Kind, + ArrayRef<uint8_t> Bytes) { + FileChecksumEntry Entry; + if (!Bytes.empty()) { + uint8_t *Copy = Storage.Allocate<uint8_t>(Bytes.size()); + ::memcpy(Copy, Bytes.data(), Bytes.size()); + Entry.Checksum = makeArrayRef(Copy, Bytes.size()); + } + + Entry.FileNameOffset = Strings.insert(FileName); + Entry.Kind = Kind; + Checksums.push_back(Entry); + + // This maps the offset of this string in the string table to the offset + // of this checksum entry in the checksum buffer. + OffsetMap[Entry.FileNameOffset] = SerializedSize; + assert(SerializedSize % 4 == 0); + + uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4); + SerializedSize += Len; +} + +uint32_t ModuleDebugFileChecksumFragment::calculateSerializedLength() { + return SerializedSize; +} + +Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { + for (const auto &FC : Checksums) { + FileChecksumEntryHeader Header; + Header.ChecksumKind = uint8_t(FC.Kind); + Header.ChecksumSize = FC.Checksum.size(); + Header.FileNameOffset = FC.FileNameOffset; + if (auto EC = Writer.writeObject(Header)) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(FC.Checksum))) + return EC; + if (auto EC = Writer.padToAlignment(4)) + return EC; + } + return Error::success(); +} + +uint32_t +ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const { + uint32_t Offset = Strings.getStringId(FileName); + auto Iter = OffsetMap.find(Offset); + assert(Iter != OffsetMap.end()); + return Iter->second; +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp new file mode 100644 index 000000000000..2af1917413da --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp @@ -0,0 +1,16 @@ +//===- ModuleDebugFragment.cpp -----------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" + +using namespace llvm::codeview; + +ModuleDebugFragmentRef::~ModuleDebugFragmentRef() {} + +ModuleDebugFragment::~ModuleDebugFragment() {} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp new file mode 100644 index 000000000000..b2543de78069 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp @@ -0,0 +1,84 @@ +//===- ModuleDebugFragmentRecord.cpp -----------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" + +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +ModuleDebugFragmentRecord::ModuleDebugFragmentRecord() + : Kind(ModuleDebugFragmentKind::None) {} + +ModuleDebugFragmentRecord::ModuleDebugFragmentRecord( + ModuleDebugFragmentKind Kind, BinaryStreamRef Data) + : Kind(Kind), Data(Data) {} + +Error ModuleDebugFragmentRecord::initialize(BinaryStreamRef Stream, + ModuleDebugFragmentRecord &Info) { + const ModuleDebugFragmentHeader *Header; + BinaryStreamReader Reader(Stream); + if (auto EC = Reader.readObject(Header)) + return EC; + + ModuleDebugFragmentKind Kind = + static_cast<ModuleDebugFragmentKind>(uint32_t(Header->Kind)); + switch (Kind) { + case ModuleDebugFragmentKind::FileChecksums: + case ModuleDebugFragmentKind::Lines: + case ModuleDebugFragmentKind::InlineeLines: + break; + default: + llvm_unreachable("Unexpected debug fragment kind!"); + } + if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) + return EC; + Info.Kind = Kind; + return Error::success(); +} + +uint32_t ModuleDebugFragmentRecord::getRecordLength() const { + uint32_t Result = sizeof(ModuleDebugFragmentHeader) + Data.getLength(); + assert(Result % 4 == 0); + return Result; +} + +ModuleDebugFragmentKind ModuleDebugFragmentRecord::kind() const { return Kind; } + +BinaryStreamRef ModuleDebugFragmentRecord::getRecordData() const { + return Data; +} + +ModuleDebugFragmentRecordBuilder::ModuleDebugFragmentRecordBuilder( + ModuleDebugFragmentKind Kind, ModuleDebugFragment &Frag) + : Kind(Kind), Frag(Frag) {} + +uint32_t ModuleDebugFragmentRecordBuilder::calculateSerializedLength() { + uint32_t Size = sizeof(ModuleDebugFragmentHeader) + + alignTo(Frag.calculateSerializedLength(), 4); + return Size; +} + +Error ModuleDebugFragmentRecordBuilder::commit(BinaryStreamWriter &Writer) { + ModuleDebugFragmentHeader Header; + Header.Kind = uint32_t(Kind); + Header.Length = + calculateSerializedLength() - sizeof(ModuleDebugFragmentHeader); + + if (auto EC = Writer.writeObject(Header)) + return EC; + if (auto EC = Frag.commit(Writer)) + return EC; + if (auto EC = Writer.padToAlignment(4)) + return EC; + + return Error::success(); +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp new file mode 100644 index 000000000000..dc591f3990e2 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp @@ -0,0 +1,52 @@ +//===- ModuleDebugFragmentVisitor.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h" + +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error llvm::codeview::visitModuleDebugFragment( + const ModuleDebugFragmentRecord &R, ModuleDebugFragmentVisitor &V) { + BinaryStreamReader Reader(R.getRecordData()); + switch (R.kind()) { + case ModuleDebugFragmentKind::Lines: { + ModuleDebugLineFragmentRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitLines(Fragment); + } + case ModuleDebugFragmentKind::FileChecksums: { + ModuleDebugFileChecksumFragmentRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitFileChecksums(Fragment); + } + case ModuleDebugFragmentKind::InlineeLines: { + ModuleDebugInlineeLineFragmentRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + return V.visitInlineeLines(Fragment); + } + default: { + ModuleDebugUnknownFragmentRef Fragment(R.kind(), R.getRecordData()); + return V.visitUnknown(Fragment); + } + } +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp new file mode 100644 index 000000000000..cb6a8478797f --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp @@ -0,0 +1,123 @@ +//===- ModuleDebugInlineeLineFragment.cpp ------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error VarStreamArrayExtractor<InlineeSourceLine>::extract( + BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item, + bool HasExtraFiles) { + BinaryStreamReader Reader(Stream); + + if (auto EC = Reader.readObject(Item.Header)) + return EC; + + if (HasExtraFiles) { + uint32_t ExtraFileCount; + if (auto EC = Reader.readInteger(ExtraFileCount)) + return EC; + if (auto EC = Reader.readArray(Item.ExtraFiles, ExtraFileCount)) + return EC; + } + + Len = Reader.getOffset(); + return Error::success(); +} + +ModuleDebugInlineeLineFragmentRef::ModuleDebugInlineeLineFragmentRef() + : ModuleDebugFragmentRef(ModuleDebugFragmentKind::InlineeLines) {} + +Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readEnum(Signature)) + return EC; + + if (auto EC = + Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles())) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const { + return Signature == InlineeLinesSignature::ExtraFiles; +} + +ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment( + ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles) + : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines), + Checksums(Checksums), HasExtraFiles(HasExtraFiles) {} + +uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { + // 4 bytes for the signature + uint32_t Size = sizeof(InlineeLinesSignature); + + // one header for each entry. + Size += Entries.size() * sizeof(InlineeSourceLineHeader); + if (HasExtraFiles) { + // If extra files are enabled, one count for each entry. + Size += Entries.size() * sizeof(uint32_t); + + // And one file id for each file. + Size += ExtraFileCount * sizeof(uint32_t); + } + assert(Size % 4 == 0); + return Size; +} + +Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { + InlineeLinesSignature Sig = InlineeLinesSignature::Normal; + if (HasExtraFiles) + Sig = InlineeLinesSignature::ExtraFiles; + + if (auto EC = Writer.writeEnum(Sig)) + return EC; + + for (const auto &E : Entries) { + if (auto EC = Writer.writeObject(E.Header)) + return EC; + + if (!HasExtraFiles) + continue; + + if (auto EC = Writer.writeInteger<uint32_t>(E.ExtraFiles.size())) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(E.ExtraFiles))) + return EC; + } + + return Error::success(); +} + +void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + auto &Entry = Entries.back(); + Entry.ExtraFiles.push_back(ulittle32_t(Offset)); + ++ExtraFileCount; +} + +void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId, + StringRef FileName, + uint32_t SourceLine) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Entries.emplace_back(); + auto &Entry = Entries.back(); + Entry.Header.FileID = Offset; + Entry.Header.SourceLineNum = SourceLine; + Entry.Header.Inlinee = FuncId; +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp new file mode 100644 index 000000000000..e0ee934709ba --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp @@ -0,0 +1,161 @@ +//===- ModuleDebugLineFragment.cpp -------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item, + const LineFragmentHeader *Header) { + using namespace codeview; + const LineBlockFragmentHeader *BlockHeader; + BinaryStreamReader Reader(Stream); + if (auto EC = Reader.readObject(BlockHeader)) + return EC; + bool HasColumn = Header->Flags & uint16_t(LF_HaveColumns); + uint32_t LineInfoSize = + BlockHeader->NumLines * + (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0)); + if (BlockHeader->BlockSize < sizeof(LineBlockFragmentHeader)) + return make_error<CodeViewError>(cv_error_code::corrupt_record, + "Invalid line block record size"); + uint32_t Size = BlockHeader->BlockSize - sizeof(LineBlockFragmentHeader); + if (LineInfoSize > Size) + return make_error<CodeViewError>(cv_error_code::corrupt_record, + "Invalid line block record size"); + // The value recorded in BlockHeader->BlockSize includes the size of + // LineBlockFragmentHeader. + Len = BlockHeader->BlockSize; + Item.NameIndex = BlockHeader->NameIndex; + if (auto EC = Reader.readArray(Item.LineNumbers, BlockHeader->NumLines)) + return EC; + if (HasColumn) { + if (auto EC = Reader.readArray(Item.Columns, BlockHeader->NumLines)) + return EC; + } + return Error::success(); +} + +ModuleDebugLineFragmentRef::ModuleDebugLineFragmentRef() + : ModuleDebugFragmentRef(ModuleDebugFragmentKind::Lines) {} + +Error ModuleDebugLineFragmentRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + if (auto EC = + Reader.readArray(LinesAndColumns, Reader.bytesRemaining(), Header)) + return EC; + + return Error::success(); +} + +bool ModuleDebugLineFragmentRef::hasColumnInfo() const { + return !!(Header->Flags & LF_HaveColumns); +} + +ModuleDebugLineFragment::ModuleDebugLineFragment( + ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::Lines), + Checksums(Checksums) {} + +void ModuleDebugLineFragment::createBlock(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Blocks.emplace_back(Offset); +} + +void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, + const LineInfo &Line) { + Block &B = Blocks.back(); + LineNumberEntry LNE; + LNE.Flags = Line.getRawData(); + LNE.Offset = Offset; + B.Lines.push_back(LNE); +} + +void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset, + const LineInfo &Line, + uint32_t ColStart, + uint32_t ColEnd) { + Block &B = Blocks.back(); + assert(B.Lines.size() == B.Columns.size()); + + addLineInfo(Offset, Line); + ColumnNumberEntry CNE; + CNE.StartColumn = ColStart; + CNE.EndColumn = ColEnd; + B.Columns.push_back(CNE); +} + +Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) { + LineFragmentHeader Header; + Header.CodeSize = CodeSize; + Header.Flags = hasColumnInfo() ? LF_HaveColumns : 0; + Header.RelocOffset = RelocOffset; + Header.RelocSegment = RelocSegment; + + if (auto EC = Writer.writeObject(Header)) + return EC; + + for (const auto &B : Blocks) { + LineBlockFragmentHeader BlockHeader; + assert(B.Lines.size() == B.Columns.size() || B.Columns.empty()); + + BlockHeader.NumLines = B.Lines.size(); + BlockHeader.BlockSize = sizeof(LineBlockFragmentHeader); + BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(LineNumberEntry); + if (hasColumnInfo()) + BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(ColumnNumberEntry); + BlockHeader.NameIndex = B.ChecksumBufferOffset; + if (auto EC = Writer.writeObject(BlockHeader)) + return EC; + + if (auto EC = Writer.writeArray(makeArrayRef(B.Lines))) + return EC; + + if (hasColumnInfo()) { + if (auto EC = Writer.writeArray(makeArrayRef(B.Columns))) + return EC; + } + } + return Error::success(); +} + +uint32_t ModuleDebugLineFragment::calculateSerializedLength() { + uint32_t Size = sizeof(LineFragmentHeader); + for (const auto &B : Blocks) { + Size += sizeof(LineBlockFragmentHeader); + Size += B.Lines.size() * sizeof(LineNumberEntry); + if (hasColumnInfo()) + Size += B.Columns.size() * sizeof(ColumnNumberEntry); + } + return Size; +} + +void ModuleDebugLineFragment::setRelocationAddress(uint16_t Segment, + uint16_t Offset) { + RelocOffset = Offset; + RelocSegment = Segment; +} + +void ModuleDebugLineFragment::setCodeSize(uint32_t Size) { CodeSize = Size; } + +void ModuleDebugLineFragment::setFlags(LineFlags Flags) { this->Flags = Flags; } + +bool ModuleDebugLineFragment::hasColumnInfo() const { + return Flags & LF_HaveColumns; +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp new file mode 100644 index 000000000000..9fd2cb8ed3e8 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp @@ -0,0 +1,10 @@ +//===- ModuleDebugUnknownFragment.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
\ No newline at end of file diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp deleted file mode 100644 index 69a7c59116cf..000000000000 --- a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===- ModuleSubstream.cpp --------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleSubstream.h" - -#include "llvm/Support/BinaryStreamReader.h" - -using namespace llvm; -using namespace llvm::codeview; - -ModuleSubstream::ModuleSubstream() : Kind(ModuleSubstreamKind::None) {} - -ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data) - : Kind(Kind), Data(Data) {} - -Error ModuleSubstream::initialize(BinaryStreamRef Stream, - ModuleSubstream &Info) { - const ModuleSubsectionHeader *Header; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readObject(Header)) - return EC; - - ModuleSubstreamKind Kind = - static_cast<ModuleSubstreamKind>(uint32_t(Header->Kind)); - if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) - return EC; - Info.Kind = Kind; - return Error::success(); -} - -uint32_t ModuleSubstream::getRecordLength() const { - return sizeof(ModuleSubsectionHeader) + Data.getLength(); -} - -ModuleSubstreamKind ModuleSubstream::getSubstreamKind() const { return Kind; } - -BinaryStreamRef ModuleSubstream::getRecordData() const { return Data; } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp deleted file mode 100644 index e490a78cadbc..000000000000 --- a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp +++ /dev/null @@ -1,106 +0,0 @@ -//===- ModuleSubstreamVisitor.cpp -------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" - -using namespace llvm; -using namespace llvm::codeview; - -Error IModuleSubstreamVisitor::visitSymbols(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::Symbols, Data); -} -Error IModuleSubstreamVisitor::visitLines(BinaryStreamRef Data, - const LineSubstreamHeader *Header, - const LineInfoArray &Lines) { - return visitUnknown(ModuleSubstreamKind::Lines, Data); -} -Error IModuleSubstreamVisitor::visitStringTable(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::StringTable, Data); -} -Error IModuleSubstreamVisitor::visitFileChecksums( - BinaryStreamRef Data, const FileChecksumArray &Checksums) { - return visitUnknown(ModuleSubstreamKind::FileChecksums, Data); -} -Error IModuleSubstreamVisitor::visitFrameData(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::FrameData, Data); -} -Error IModuleSubstreamVisitor::visitInlineeLines(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::InlineeLines, Data); -} -Error IModuleSubstreamVisitor::visitCrossScopeImports(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CrossScopeExports, Data); -} -Error IModuleSubstreamVisitor::visitCrossScopeExports(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CrossScopeImports, Data); -} -Error IModuleSubstreamVisitor::visitILLines(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::ILLines, Data); -} -Error IModuleSubstreamVisitor::visitFuncMDTokenMap(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::FuncMDTokenMap, Data); -} -Error IModuleSubstreamVisitor::visitTypeMDTokenMap(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::TypeMDTokenMap, Data); -} -Error IModuleSubstreamVisitor::visitMergedAssemblyInput(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::MergedAssemblyInput, Data); -} -Error IModuleSubstreamVisitor::visitCoffSymbolRVA(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CoffSymbolRVA, Data); -} - -Error llvm::codeview::visitModuleSubstream(const ModuleSubstream &R, - IModuleSubstreamVisitor &V) { - switch (R.getSubstreamKind()) { - case ModuleSubstreamKind::Symbols: - return V.visitSymbols(R.getRecordData()); - case ModuleSubstreamKind::Lines: { - BinaryStreamReader Reader(R.getRecordData()); - const LineSubstreamHeader *Header; - if (auto EC = Reader.readObject(Header)) - return EC; - VarStreamArrayExtractor<LineColumnEntry> E(Header); - LineInfoArray LineInfos(E); - if (auto EC = Reader.readArray(LineInfos, Reader.bytesRemaining())) - return EC; - return V.visitLines(R.getRecordData(), Header, LineInfos); - } - case ModuleSubstreamKind::StringTable: - return V.visitStringTable(R.getRecordData()); - case ModuleSubstreamKind::FileChecksums: { - BinaryStreamReader Reader(R.getRecordData()); - FileChecksumArray Checksums; - if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining())) - return EC; - return V.visitFileChecksums(R.getRecordData(), Checksums); - } - case ModuleSubstreamKind::FrameData: - return V.visitFrameData(R.getRecordData()); - case ModuleSubstreamKind::InlineeLines: - return V.visitInlineeLines(R.getRecordData()); - case ModuleSubstreamKind::CrossScopeImports: - return V.visitCrossScopeImports(R.getRecordData()); - case ModuleSubstreamKind::CrossScopeExports: - return V.visitCrossScopeExports(R.getRecordData()); - case ModuleSubstreamKind::ILLines: - return V.visitILLines(R.getRecordData()); - case ModuleSubstreamKind::FuncMDTokenMap: - return V.visitFuncMDTokenMap(R.getRecordData()); - case ModuleSubstreamKind::TypeMDTokenMap: - return V.visitTypeMDTokenMap(R.getRecordData()); - case ModuleSubstreamKind::MergedAssemblyInput: - return V.visitMergedAssemblyInput(R.getRecordData()); - case ModuleSubstreamKind::CoffSymbolRVA: - return V.visitCoffSymbolRVA(R.getRecordData()); - default: - return V.visitUnknown(R.getSubstreamKind(), R.getRecordData()); - } -} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/StringTable.cpp b/contrib/llvm/lib/DebugInfo/CodeView/StringTable.cpp new file mode 100644 index 000000000000..21f11204686b --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/StringTable.cpp @@ -0,0 +1,71 @@ +//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringTable.h" + +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringTableRef::StringTableRef() {} + +Error StringTableRef::initialize(BinaryStreamRef Contents) { + Stream = Contents; + return Error::success(); +} + +Expected<StringRef> StringTableRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + if (auto EC = Reader.readCString(Result)) + return std::move(EC); + return Result; +} + +uint32_t StringTable::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t StringTable::calculateSerializedSize() const { return StringSize; } + +Error StringTable::commit(BinaryStreamWriter &Writer) const { + assert(Writer.bytesRemaining() == StringSize); + uint32_t MaxOffset = 1; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + MaxOffset = std::max<uint32_t>(MaxOffset, Offset + S.size() + 1); + } + + Writer.setOffset(MaxOffset); + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +uint32_t StringTable::size() const { return Strings.size(); } + +uint32_t StringTable::getStringId(StringRef S) const { + auto P = Strings.find(S); + assert(P != Strings.end()); + return P->second; +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp index 134471e81cac..5395e4349b28 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -369,14 +370,14 @@ Error CVSymbolDumperImpl::visitKnownRecord( DictScope S(W, "DefRangeSubfield"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRangeSubfield.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent); printLocalVariableAddrRange(DefRangeSubfield.Range, @@ -390,14 +391,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DictScope S(W, "DefRange"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRange.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRange.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset()); printLocalVariableAddrGap(DefRange.Gaps); diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeDatabase.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeDatabase.cpp index f9ded6ce2a86..efaba4646ffe 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeDatabase.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeDatabase.cpp @@ -110,6 +110,10 @@ const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const { return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex]; } +CVType &TypeDatabase::getTypeRecord(TypeIndex Index) { + return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex]; +} + bool TypeDatabase::containsTypeIndex(TypeIndex Index) const { uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex; return I < CVUDTNames.size(); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 7e8d04672c03..573d37d77fee 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" @@ -29,6 +29,7 @@ #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -42,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> +#include <map> +#include <set> #include <string> #include <utility> #include <vector> @@ -284,6 +287,268 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } +DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { + parseCompileUnits(); + if (auto *CU = CUs.getUnitForOffset(Offset)) + return CU->getDIEForOffset(Offset); + return DWARFDie(); +} + +namespace { + +class Verifier { + raw_ostream &OS; + DWARFContext &DCtx; +public: + Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {} + + bool HandleDebugInfo() { + bool Success = true; + // A map that tracks all references (converted absolute references) so we + // can verify each reference points to a valid DIE and not an offset that + // lies between to valid DIEs. + std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets; + + OS << "Verifying .debug_info...\n"; + for (const auto &CU : DCtx.compile_units()) { + unsigned NumDies = CU->getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = CU->getDIEAtIndex(I); + const auto Tag = Die.getTag(); + if (Tag == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + const auto Attr = AttrValue.Attr; + const auto Form = AttrValue.Value.getForm(); + switch (Attr) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + Success = false; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + Success = false; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + Success = false; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + CU->getUnitDIE().dump(OS, 0); + OS << "\n"; + } + } else { + Success = false; + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + + default: + break; + } + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + Success = false; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if(*RefVal >= DCtx.getInfoSection().Data.size()) { + Success = false; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { + Success = false; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + } + default: + break; + } + } + } + } + + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair: ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + Success = false; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset: Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } + return Success; + } + + bool HandleDebugLine() { + std::map<uint64_t, DWARFDie> StmtListToDie; + bool Success = true; + OS << "Verifying .debug_line...\n"; + for (const auto &CU : DCtx.compile_units()) { + uint32_t LineTableOffset = 0; + auto CUDie = CU->getUnitDIE(); + auto StmtFormValue = CUDie.find(DW_AT_stmt_list); + if (!StmtFormValue) { + // No line table for this compile unit. + continue; + } + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) { + LineTableOffset = *StmtSectionOffset; + if (LineTableOffset >= DCtx.getLineSection().Data.size()) { + // Make sure we don't get a valid line table back if the offset + // is wrong. + assert(DCtx.getLineTableForUnit(CU.get()) == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } else { + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + Success = false; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, CUDie.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + CUDie.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; + } + StmtListToDie[LineTableOffset] = CUDie; + } + } + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (!LineTable) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + CUDie.dump(OS, 0); + OS << '\n'; + continue; + } + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } + } + return Success; + } +}; + +} // anonymous namespace + +bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { + bool Success = true; + DWARFVerifier verifier(OS, *this); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + if (!verifier.handleDebugInfo()) + Success = false; + } + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + if (!verifier.handleDebugLine()) + Success = false; + } + return Success; +} const DWARFUnitIndex &DWARFContext::getCUIndex() { if (CUIndex) return *CUIndex; diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index ff6ed9c6741d..f32e8fe76357 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -26,11 +27,19 @@ using namespace llvm; using namespace dwarf; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +namespace { +struct ContentDescriptor { + dwarf::LineNumberEntryFormat Type; + dwarf::Form Form; +}; +typedef SmallVector<ContentDescriptor, 4> ContentDescriptors; +} // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { TotalLength = Version = PrologueLength = 0; + AddressSize = SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; IsDWARF64 = false; @@ -43,6 +52,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) << format(" version: %u\n", Version) + << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) + << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) @@ -51,95 +62,210 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { << format(" line_range: %u\n", LineRange) << format(" opcode_base: %u\n", OpcodeBase); - for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i) + for (uint32_t I = 0; I != StandardOpcodeLengths.size(); ++I) OS << format("standard_opcode_lengths[%s] = %u\n", - LNStandardString(i + 1).data(), StandardOpcodeLengths[i]); + LNStandardString(I + 1).data(), StandardOpcodeLengths[I]); if (!IncludeDirectories.empty()) - for (uint32_t i = 0; i < IncludeDirectories.size(); ++i) - OS << format("include_directories[%3u] = '", i + 1) - << IncludeDirectories[i] << "'\n"; + for (uint32_t I = 0; I != IncludeDirectories.size(); ++I) + OS << format("include_directories[%3u] = '", I + 1) + << IncludeDirectories[I] << "'\n"; if (!FileNames.empty()) { OS << " Dir Mod Time File Len File Name\n" << " ---- ---------- ---------- -----------" "----------------\n"; - for (uint32_t i = 0; i < FileNames.size(); ++i) { - const FileNameEntry &fileEntry = FileNames[i]; - OS << format("file_names[%3u] %4" PRIu64 " ", i + 1, fileEntry.DirIdx) - << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", fileEntry.ModTime, - fileEntry.Length) - << fileEntry.Name << '\n'; + for (uint32_t I = 0; I != FileNames.size(); ++I) { + const FileNameEntry &FileEntry = FileNames[I]; + OS << format("file_names[%3u] %4" PRIu64 " ", I + 1, FileEntry.DirIdx) + << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", FileEntry.ModTime, + FileEntry.Length) + << FileEntry.Name << '\n'; } } } -bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data, - uint32_t *offset_ptr) { - const uint64_t prologue_offset = *offset_ptr; +// Parse v2-v4 directory and file tables. +static void +parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + while (*OffsetPtr < EndPrologueOffset) { + StringRef S = DebugLineData.getCStrRef(OffsetPtr); + if (S.empty()) + break; + IncludeDirectories.push_back(S); + } + + while (*OffsetPtr < EndPrologueOffset) { + StringRef Name = DebugLineData.getCStrRef(OffsetPtr); + if (Name.empty()) + break; + DWARFDebugLine::FileNameEntry FileEntry; + FileEntry.Name = Name; + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileNames.push_back(FileEntry); + } +} + +// Parse v5 directory/file entry content descriptions. +// Returns the descriptors, or an empty vector if we did not find a path or +// ran off the end of the prologue. +static ContentDescriptors +parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset) { + ContentDescriptors Descriptors; + int FormatCount = DebugLineData.getU8(OffsetPtr); + bool HasPath = false; + for (int I = 0; I != FormatCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return ContentDescriptors(); + ContentDescriptor Descriptor; + Descriptor.Type = + dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); + Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); + if (Descriptor.Type == dwarf::DW_LNCT_path) + HasPath = true; + Descriptors.push_back(Descriptor); + } + return HasPath ? Descriptors : ContentDescriptors(); +} + +static bool +parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + // Get the directory entry description. + ContentDescriptors DirDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (DirDescriptors.empty()) + return false; + + // Get the directory entries, according to the format described above. + int DirEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != DirEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + for (auto Descriptor : DirDescriptors) { + DWARFFormValue Value(Descriptor.Form); + switch (Descriptor.Type) { + case DW_LNCT_path: + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + IncludeDirectories.push_back(Value.getAsCString().getValue()); + break; + default: + if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + return false; + } + } + } + + // Get the file entry description. + ContentDescriptors FileDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (FileDescriptors.empty()) + return false; + + // Get the file entries, according to the format described above. + int FileEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != FileEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + DWARFDebugLine::FileNameEntry FileEntry; + for (auto Descriptor : FileDescriptors) { + DWARFFormValue Value(Descriptor.Form); + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + switch (Descriptor.Type) { + case DW_LNCT_path: + FileEntry.Name = Value.getAsCString().getValue(); + break; + case DW_LNCT_directory_index: + FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_timestamp: + FileEntry.ModTime = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_size: + FileEntry.Length = Value.getAsUnsignedConstant().getValue(); + break; + // FIXME: Add MD5 + default: + break; + } + } + FileNames.push_back(FileEntry); + } + return true; +} + +bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, + uint32_t *OffsetPtr) { + const uint64_t PrologueOffset = *OffsetPtr; clear(); - TotalLength = debug_line_data.getU32(offset_ptr); + TotalLength = DebugLineData.getU32(OffsetPtr); if (TotalLength == UINT32_MAX) { IsDWARF64 = true; - TotalLength = debug_line_data.getU64(offset_ptr); + TotalLength = DebugLineData.getU64(OffsetPtr); } else if (TotalLength > 0xffffff00) { return false; } - Version = debug_line_data.getU16(offset_ptr); + Version = DebugLineData.getU16(OffsetPtr); if (Version < 2) return false; - PrologueLength = - debug_line_data.getUnsigned(offset_ptr, sizeofPrologueLength()); - const uint64_t end_prologue_offset = PrologueLength + *offset_ptr; - MinInstLength = debug_line_data.getU8(offset_ptr); + if (Version >= 5) { + AddressSize = DebugLineData.getU8(OffsetPtr); + SegSelectorSize = DebugLineData.getU8(OffsetPtr); + } + + PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); + const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; + MinInstLength = DebugLineData.getU8(OffsetPtr); if (Version >= 4) - MaxOpsPerInst = debug_line_data.getU8(offset_ptr); - DefaultIsStmt = debug_line_data.getU8(offset_ptr); - LineBase = debug_line_data.getU8(offset_ptr); - LineRange = debug_line_data.getU8(offset_ptr); - OpcodeBase = debug_line_data.getU8(offset_ptr); + MaxOpsPerInst = DebugLineData.getU8(OffsetPtr); + DefaultIsStmt = DebugLineData.getU8(OffsetPtr); + LineBase = DebugLineData.getU8(OffsetPtr); + LineRange = DebugLineData.getU8(OffsetPtr); + OpcodeBase = DebugLineData.getU8(OffsetPtr); StandardOpcodeLengths.reserve(OpcodeBase - 1); - for (uint32_t i = 1; i < OpcodeBase; ++i) { - uint8_t op_len = debug_line_data.getU8(offset_ptr); - StandardOpcodeLengths.push_back(op_len); - } - - while (*offset_ptr < end_prologue_offset) { - const char *s = debug_line_data.getCStr(offset_ptr); - if (s && s[0]) - IncludeDirectories.push_back(s); - else - break; + for (uint32_t I = 1; I < OpcodeBase; ++I) { + uint8_t OpLen = DebugLineData.getU8(OffsetPtr); + StandardOpcodeLengths.push_back(OpLen); } - while (*offset_ptr < end_prologue_offset) { - const char *name = debug_line_data.getCStr(offset_ptr); - if (name && name[0]) { - FileNameEntry fileEntry; - fileEntry.Name = name; - fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); - fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); - fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - FileNames.push_back(fileEntry); - } else { - break; + if (Version >= 5) { + if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames)) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64 "\n", PrologueOffset, (uint64_t)*OffsetPtr); + return false; } - } - - if (*offset_ptr != end_prologue_offset) { - fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 - " should have ended at 0x%8.8" PRIx64 - " but it ended at 0x%8.8" PRIx64 "\n", - prologue_offset, end_prologue_offset, (uint64_t)*offset_ptr); + } else + parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames); + + if (*OffsetPtr != EndPrologueOffset) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " should have ended at 0x%8.8" PRIx64 + " but it ended at 0x%8.8" PRIx64 "\n", + PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr); return false; } return true; } -DWARFDebugLine::Row::Row(bool default_is_stmt) { reset(default_is_stmt); } +DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); } void DWARFDebugLine::Row::postAppend() { BasicBlock = false; @@ -147,20 +273,26 @@ void DWARFDebugLine::Row::postAppend() { EpilogueBegin = false; } -void DWARFDebugLine::Row::reset(bool default_is_stmt) { +void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { Address = 0; Line = 1; Column = 0; File = 1; Isa = 0; Discriminator = 0; - IsStmt = default_is_stmt; + IsStmt = DefaultIsStmt; BasicBlock = false; EndSequence = false; PrologueEnd = false; EpilogueBegin = false; } +void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) { + OS << "Address Line Column File ISA Discriminator Flags\n" + << "------------------ ------ ------ ------ --- ------------- " + "-------------\n"; +} + void DWARFDebugLine::Row::dump(raw_ostream &OS) const { OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u %13u ", File, Isa, Discriminator) @@ -187,9 +319,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { OS << '\n'; if (!Rows.empty()) { - OS << "Address Line Column File ISA Discriminator Flags\n" - << "------------------ ------ ------ ------ --- ------------- " - "-------------\n"; + Row::dumpTableHeader(OS); for (const Row &R : Rows) { R.dump(OS); } @@ -212,7 +342,7 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() { Sequence.reset(); } -void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) { +void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t Offset) { if (Sequence.Empty) { // Record the beginning of instruction sequence. Sequence.Empty = false; @@ -233,56 +363,56 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) { } const DWARFDebugLine::LineTable * -DWARFDebugLine::getLineTable(uint32_t offset) const { - LineTableConstIter pos = LineTableMap.find(offset); - if (pos != LineTableMap.end()) - return &pos->second; +DWARFDebugLine::getLineTable(uint32_t Offset) const { + LineTableConstIter Pos = LineTableMap.find(Offset); + if (Pos != LineTableMap.end()) + return &Pos->second; return nullptr; } const DWARFDebugLine::LineTable * -DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, - uint32_t offset) { - std::pair<LineTableIter, bool> pos = - LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable())); - LineTable *LT = &pos.first->second; - if (pos.second) { - if (!LT->parse(debug_line_data, RelocMap, &offset)) +DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData, + uint32_t Offset) { + std::pair<LineTableIter, bool> Pos = + LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable())); + LineTable *LT = &Pos.first->second; + if (Pos.second) { + if (!LT->parse(DebugLineData, RelocMap, &Offset)) return nullptr; } return LT; } -bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, +bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData, const RelocAddrMap *RMap, - uint32_t *offset_ptr) { - const uint32_t debug_line_offset = *offset_ptr; + uint32_t *OffsetPtr) { + const uint32_t DebugLineOffset = *OffsetPtr; clear(); - if (!Prologue.parse(debug_line_data, offset_ptr)) { + if (!Prologue.parse(DebugLineData, OffsetPtr)) { // Restore our offset and return false to indicate failure! - *offset_ptr = debug_line_offset; + *OffsetPtr = DebugLineOffset; return false; } - const uint32_t end_offset = - debug_line_offset + Prologue.TotalLength + Prologue.sizeofTotalLength(); + const uint32_t EndOffset = + DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength(); ParsingState State(this); - while (*offset_ptr < end_offset) { - uint8_t opcode = debug_line_data.getU8(offset_ptr); + while (*OffsetPtr < EndOffset) { + uint8_t Opcode = DebugLineData.getU8(OffsetPtr); - if (opcode == 0) { + if (Opcode == 0) { // Extended Opcodes always start with a zero opcode followed by // a uleb128 length so you can skip ones you don't know about - uint32_t ext_offset = *offset_ptr; - uint64_t len = debug_line_data.getULEB128(offset_ptr); - uint32_t arg_size = len - (*offset_ptr - ext_offset); + uint32_t ExtOffset = *OffsetPtr; + uint64_t Len = DebugLineData.getULEB128(OffsetPtr); + uint32_t ArgSize = Len - (*OffsetPtr - ExtOffset); - uint8_t sub_opcode = debug_line_data.getU8(offset_ptr); - switch (sub_opcode) { + uint8_t SubOpcode = DebugLineData.getU8(OffsetPtr); + switch (SubOpcode) { case DW_LNE_end_sequence: // Set the end_sequence register of the state machine to true and // append a row to the matrix using the current values of the @@ -292,7 +422,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // address is that of the byte after the last target machine instruction // of the sequence. State.Row.EndSequence = true; - State.appendRowToMatrix(*offset_ptr); + State.appendRowToMatrix(*OffsetPtr); State.resetRowAndSequence(); break; @@ -303,9 +433,8 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - State.Row.Address = - getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(), - offset_ptr, RMap); + State.Row.Address = getRelocatedValue( + DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap); break; case DW_LNE_define_file: @@ -330,33 +459,33 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // the DW_LNE_define_file instruction. These numbers are used in the // the file register of the state machine. { - FileNameEntry fileEntry; - fileEntry.Name = debug_line_data.getCStr(offset_ptr); - fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); - fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); - fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - Prologue.FileNames.push_back(fileEntry); + FileNameEntry FileEntry; + FileEntry.Name = DebugLineData.getCStr(OffsetPtr); + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + Prologue.FileNames.push_back(FileEntry); } break; case DW_LNE_set_discriminator: - State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr); + State.Row.Discriminator = DebugLineData.getULEB128(OffsetPtr); break; default: // Length doesn't include the zero opcode byte or the length itself, but // it does include the sub_opcode, so we have to adjust for that below - (*offset_ptr) += arg_size; + (*OffsetPtr) += ArgSize; break; } - } else if (opcode < Prologue.OpcodeBase) { - switch (opcode) { + } else if (Opcode < Prologue.OpcodeBase) { + switch (Opcode) { // Standard Opcodes case DW_LNS_copy: // Takes no arguments. Append a row to the matrix using the // current values of the state-machine registers. Then set // the basic_block register to false. - State.appendRowToMatrix(*offset_ptr); + State.appendRowToMatrix(*OffsetPtr); break; case DW_LNS_advance_pc: @@ -364,25 +493,25 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // min_inst_length field of the prologue, and adds the // result to the address register of the state machine. State.Row.Address += - debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength; + DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength; break; case DW_LNS_advance_line: // Takes a single signed LEB128 operand and adds that value to // the line register of the state machine. - State.Row.Line += debug_line_data.getSLEB128(offset_ptr); + State.Row.Line += DebugLineData.getSLEB128(OffsetPtr); break; case DW_LNS_set_file: // Takes a single unsigned LEB128 operand and stores it in the file // register of the state machine. - State.Row.File = debug_line_data.getULEB128(offset_ptr); + State.Row.File = DebugLineData.getULEB128(OffsetPtr); break; case DW_LNS_set_column: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - State.Row.Column = debug_line_data.getULEB128(offset_ptr); + State.Row.Column = DebugLineData.getULEB128(OffsetPtr); break; case DW_LNS_negate_stmt: @@ -410,10 +539,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // than twice that range will it need to use both DW_LNS_advance_pc // and a special opcode, requiring three or more bytes. { - uint8_t adjust_opcode = 255 - Prologue.OpcodeBase; - uint64_t addr_offset = - (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; - State.Row.Address += addr_offset; + uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase; + uint64_t AddrOffset = + (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; + State.Row.Address += AddrOffset; } break; @@ -427,7 +556,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // judge when the computation of a special opcode overflows and // requires the use of DW_LNS_advance_pc. Such assemblers, however, // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. - State.Row.Address += debug_line_data.getU16(offset_ptr); + State.Row.Address += DebugLineData.getU16(OffsetPtr); break; case DW_LNS_set_prologue_end: @@ -445,7 +574,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, case DW_LNS_set_isa: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - State.Row.Isa = debug_line_data.getULEB128(offset_ptr); + State.Row.Isa = DebugLineData.getULEB128(OffsetPtr); break; default: @@ -453,10 +582,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // of such opcodes because they are specified in the prologue // as a multiple of LEB128 operands for each opcode. { - assert(opcode - 1U < Prologue.StandardOpcodeLengths.size()); - uint8_t opcode_length = Prologue.StandardOpcodeLengths[opcode - 1]; - for (uint8_t i = 0; i < opcode_length; ++i) - debug_line_data.getULEB128(offset_ptr); + assert(Opcode - 1U < Prologue.StandardOpcodeLengths.size()); + uint8_t OpcodeLength = Prologue.StandardOpcodeLengths[Opcode - 1]; + for (uint8_t I = 0; I < OpcodeLength; ++I) + DebugLineData.getULEB128(OffsetPtr); } break; } @@ -494,14 +623,14 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // // line increment = line_base + (adjusted opcode % line_range) - uint8_t adjust_opcode = opcode - Prologue.OpcodeBase; - uint64_t addr_offset = - (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; - int32_t line_offset = - Prologue.LineBase + (adjust_opcode % Prologue.LineRange); - State.Row.Line += line_offset; - State.Row.Address += addr_offset; - State.appendRowToMatrix(*offset_ptr); + uint8_t AdjustOpcode = Opcode - Prologue.OpcodeBase; + uint64_t AddrOffset = + (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; + int32_t LineOffset = + Prologue.LineBase + (AdjustOpcode % Prologue.LineRange); + State.Row.Line += LineOffset; + State.Row.Address += AddrOffset; + State.appendRowToMatrix(*OffsetPtr); // Reset discriminator to 0. State.Row.Discriminator = 0; } @@ -523,124 +652,122 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // rudimentary sequences for address ranges [0x0, 0xsomething). } - return end_offset; + return EndOffset; } uint32_t -DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &seq, - uint64_t address) const { - if (!seq.containsPC(address)) +DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &Seq, + uint64_t Address) const { + if (!Seq.containsPC(Address)) return UnknownRowIndex; // Search for instruction address in the rows describing the sequence. // Rows are stored in a vector, so we may use arithmetical operations with // iterators. - DWARFDebugLine::Row row; - row.Address = address; - RowIter first_row = Rows.begin() + seq.FirstRowIndex; - RowIter last_row = Rows.begin() + seq.LastRowIndex; - LineTable::RowIter row_pos = std::lower_bound( - first_row, last_row, row, DWARFDebugLine::Row::orderByAddress); - if (row_pos == last_row) { - return seq.LastRowIndex - 1; + DWARFDebugLine::Row Row; + Row.Address = Address; + RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex; + RowIter LastRow = Rows.begin() + Seq.LastRowIndex; + LineTable::RowIter RowPos = std::lower_bound( + FirstRow, LastRow, Row, DWARFDebugLine::Row::orderByAddress); + if (RowPos == LastRow) { + return Seq.LastRowIndex - 1; } - uint32_t index = seq.FirstRowIndex + (row_pos - first_row); - if (row_pos->Address > address) { - if (row_pos == first_row) + uint32_t Index = Seq.FirstRowIndex + (RowPos - FirstRow); + if (RowPos->Address > Address) { + if (RowPos == FirstRow) return UnknownRowIndex; else - index--; + Index--; } - return index; + return Index; } -uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { +uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t Address) const { if (Sequences.empty()) return UnknownRowIndex; // First, find an instruction sequence containing the given address. - DWARFDebugLine::Sequence sequence; - sequence.LowPC = address; - SequenceIter first_seq = Sequences.begin(); - SequenceIter last_seq = Sequences.end(); - SequenceIter seq_pos = std::lower_bound( - first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC); - DWARFDebugLine::Sequence found_seq; - if (seq_pos == last_seq) { - found_seq = Sequences.back(); - } else if (seq_pos->LowPC == address) { - found_seq = *seq_pos; + DWARFDebugLine::Sequence Sequence; + Sequence.LowPC = Address; + SequenceIter FirstSeq = Sequences.begin(); + SequenceIter LastSeq = Sequences.end(); + SequenceIter SeqPos = std::lower_bound( + FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC); + DWARFDebugLine::Sequence FoundSeq; + if (SeqPos == LastSeq) { + FoundSeq = Sequences.back(); + } else if (SeqPos->LowPC == Address) { + FoundSeq = *SeqPos; } else { - if (seq_pos == first_seq) + if (SeqPos == FirstSeq) return UnknownRowIndex; - found_seq = *(seq_pos - 1); + FoundSeq = *(SeqPos - 1); } - return findRowInSeq(found_seq, address); + return findRowInSeq(FoundSeq, Address); } bool DWARFDebugLine::LineTable::lookupAddressRange( - uint64_t address, uint64_t size, std::vector<uint32_t> &result) const { + uint64_t Address, uint64_t Size, std::vector<uint32_t> &Result) const { if (Sequences.empty()) return false; - uint64_t end_addr = address + size; + uint64_t EndAddr = Address + Size; // First, find an instruction sequence containing the given address. - DWARFDebugLine::Sequence sequence; - sequence.LowPC = address; - SequenceIter first_seq = Sequences.begin(); - SequenceIter last_seq = Sequences.end(); - SequenceIter seq_pos = std::lower_bound( - first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC); - if (seq_pos == last_seq || seq_pos->LowPC != address) { - if (seq_pos == first_seq) + DWARFDebugLine::Sequence Sequence; + Sequence.LowPC = Address; + SequenceIter FirstSeq = Sequences.begin(); + SequenceIter LastSeq = Sequences.end(); + SequenceIter SeqPos = std::lower_bound( + FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC); + if (SeqPos == LastSeq || SeqPos->LowPC != Address) { + if (SeqPos == FirstSeq) return false; - seq_pos--; + SeqPos--; } - if (!seq_pos->containsPC(address)) + if (!SeqPos->containsPC(Address)) return false; - SequenceIter start_pos = seq_pos; + SequenceIter StartPos = SeqPos; // Add the rows from the first sequence to the vector, starting with the // index we just calculated - while (seq_pos != last_seq && seq_pos->LowPC < end_addr) { - const DWARFDebugLine::Sequence &cur_seq = *seq_pos; + while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { + const DWARFDebugLine::Sequence &CurSeq = *SeqPos; // For the first sequence, we need to find which row in the sequence is the // first in our range. - uint32_t first_row_index = cur_seq.FirstRowIndex; - if (seq_pos == start_pos) - first_row_index = findRowInSeq(cur_seq, address); + uint32_t FirstRowIndex = CurSeq.FirstRowIndex; + if (SeqPos == StartPos) + FirstRowIndex = findRowInSeq(CurSeq, Address); // Figure out the last row in the range. - uint32_t last_row_index = findRowInSeq(cur_seq, end_addr - 1); - if (last_row_index == UnknownRowIndex) - last_row_index = cur_seq.LastRowIndex - 1; + uint32_t LastRowIndex = findRowInSeq(CurSeq, EndAddr - 1); + if (LastRowIndex == UnknownRowIndex) + LastRowIndex = CurSeq.LastRowIndex - 1; - assert(first_row_index != UnknownRowIndex); - assert(last_row_index != UnknownRowIndex); + assert(FirstRowIndex != UnknownRowIndex); + assert(LastRowIndex != UnknownRowIndex); - for (uint32_t i = first_row_index; i <= last_row_index; ++i) { - result.push_back(i); + for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) { + Result.push_back(I); } - ++seq_pos; + ++SeqPos; } return true; } -bool -DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { +bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { return FileIndex != 0 && FileIndex <= Prologue.FileNames.size(); } -bool -DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, - const char *CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + const char *CompDir, + FileLineInfoKind Kind, + std::string &Result) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; - const char *FileName = Entry.Name; + StringRef FileName = Entry.Name; if (Kind != FileLineInfoKind::AbsoluteFilePath || sys::path::is_absolute(FileName)) { Result = FileName; @@ -649,7 +776,7 @@ DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; uint64_t IncludeDirIndex = Entry.DirIdx; - const char *IncludeDir = ""; + StringRef IncludeDir; // Be defensive about the contents of Entry. if (IncludeDirIndex > 0 && IncludeDirIndex <= Prologue.IncludeDirectories.size()) diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index 662e53d9d7e6..daded255f8c7 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -45,7 +45,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian, } void DWARFDebugPubTable::dump(StringRef Name, raw_ostream &OS) const { - OS << "\n." << Name << " contents: a\n"; + OS << "\n." << Name << " contents:\n"; for (const Set &S : Sets) { OS << "length = " << format("0x%08x", S.Length); OS << " version = " << format("0x%04x", S.Version); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 28592e4dfb65..7f827de89240 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -309,8 +309,10 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { } // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset. // Don't check for DWARF version here, as some producers may still do this - // by mistake. - return (Form == DW_FORM_data4 || Form == DW_FORM_data8) && + // by mistake. Also accept DW_FORM_strp since this is .debug_str section + // offset. + return (Form == DW_FORM_data4 || Form == DW_FORM_data8 || + Form == DW_FORM_strp) && FC == FC_SectionOffset; } diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index 76354a9b1ddb..0625d01097c9 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -39,8 +39,9 @@ void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const { << '\n'; for (const AddressEntry &Addr : AddressArea) OS << format( - " Low address = 0x%llx, High address = 0x%llx, CU index = %d\n", - Addr.LowAddress, Addr.HighAddress, Addr.CuIndex); + " Low/High address = [0x%llx, 0x%llx) (Size: 0x%llx), CU id = %d\n", + Addr.LowAddress, Addr.HighAddress, Addr.HighAddress - Addr.LowAddress, + Addr.CuIndex); } void DWARFGdbIndex::dumpSymbolTable(raw_ostream &OS) const { diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp new file mode 100644 index 000000000000..9494e876da15 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -0,0 +1,277 @@ +//===- DWARFVerifier.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +#include <set> +#include <vector> + +using namespace llvm; +using namespace dwarf; +using namespace object; + +void DWARFVerifier::verifyDebugInfoAttribute(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Attr = AttrValue.Attr; + switch (Attr) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + + default: + break; + } +} + +void DWARFVerifier::verifyDebugInfoForm(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Form = AttrValue.Value.getForm(); + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + ++NumDebugInfoErrors; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if (*RefVal >= DCtx.getInfoSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + } + default: + break; + } +} + +void DWARFVerifier::veifyDebugInfoReferences() { + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair : ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + ++NumDebugInfoErrors; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset : Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } +} + +bool DWARFVerifier::handleDebugInfo() { + NumDebugInfoErrors = 0; + OS << "Verifying .debug_info...\n"; + for (const auto &CU : DCtx.compile_units()) { + unsigned NumDies = CU->getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = CU->getDIEAtIndex(I); + const auto Tag = Die.getTag(); + if (Tag == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + verifyDebugInfoAttribute(Die, AttrValue); + verifyDebugInfoForm(Die, AttrValue); + } + } + } + veifyDebugInfoReferences(); + return NumDebugInfoErrors == 0; +} + +void DWARFVerifier::verifyDebugLineStmtOffsets() { + std::map<uint64_t, DWARFDie> StmtListToDie; + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); + if (!StmtSectionOffset) + continue; + const uint32_t LineTableOffset = *StmtSectionOffset; + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (LineTableOffset < DCtx.getLineSection().Data.size()) { + if (!LineTable) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + Die.dump(OS, 0); + OS << '\n'; + continue; + } + } else { + // Make sure we don't get a valid line table back if the offset is wrong. + assert(LineTable == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + ++NumDebugLineErrors; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, Die.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + Die.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; + } + StmtListToDie[LineTableOffset] = Die; + } +} + +void DWARFVerifier::verifyDebugLineRows() { + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + // If there is no line table we will have created an error in the + // .debug_info verifier or in verifyDebugLineStmtOffsets(). + if (!LineTable) + continue; + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } + } +} + +bool DWARFVerifier::handleDebugLine() { + NumDebugLineErrors = 0; + OS << "Verifying .debug_line...\n"; + verifyDebugLineStmtOffsets(); + verifyDebugLineRows(); + return NumDebugLineErrors == 0; +} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModInfo.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp index 1405286fd088..dabcc3447ee5 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModInfo.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp @@ -1,4 +1,4 @@ -//===- ModInfo.cpp - PDB module information -------------------------------===// +//===- DbiModuleDescriptor.cpp - PDB module information -------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" @@ -19,13 +19,15 @@ using namespace llvm; using namespace llvm::pdb; using namespace llvm::support; -ModInfo::ModInfo() = default; +DbiModuleDescriptor::DbiModuleDescriptor() = default; -ModInfo::ModInfo(const ModInfo &Info) = default; +DbiModuleDescriptor::DbiModuleDescriptor(const DbiModuleDescriptor &Info) = + default; -ModInfo::~ModInfo() = default; +DbiModuleDescriptor::~DbiModuleDescriptor() = default; -Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) { +Error DbiModuleDescriptor::initialize(BinaryStreamRef Stream, + DbiModuleDescriptor &Info) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Info.Layout)) return EC; @@ -38,40 +40,48 @@ Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) { return Error::success(); } -bool ModInfo::hasECInfo() const { +bool DbiModuleDescriptor::hasECInfo() const { return (Layout->Flags & ModInfoFlags::HasECFlagMask) != 0; } -uint16_t ModInfo::getTypeServerIndex() const { +uint16_t DbiModuleDescriptor::getTypeServerIndex() const { return (Layout->Flags & ModInfoFlags::TypeServerIndexMask) >> ModInfoFlags::TypeServerIndexShift; } -uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; } +uint16_t DbiModuleDescriptor::getModuleStreamIndex() const { + return Layout->ModDiStream; +} -uint32_t ModInfo::getSymbolDebugInfoByteSize() const { +uint32_t DbiModuleDescriptor::getSymbolDebugInfoByteSize() const { return Layout->SymBytes; } -uint32_t ModInfo::getLineInfoByteSize() const { return Layout->LineBytes; } +uint32_t DbiModuleDescriptor::getC11LineInfoByteSize() const { + return Layout->C11Bytes; +} -uint32_t ModInfo::getC13LineInfoByteSize() const { return Layout->C13Bytes; } +uint32_t DbiModuleDescriptor::getC13LineInfoByteSize() const { + return Layout->C13Bytes; +} -uint32_t ModInfo::getNumberOfFiles() const { return Layout->NumFiles; } +uint32_t DbiModuleDescriptor::getNumberOfFiles() const { + return Layout->NumFiles; +} -uint32_t ModInfo::getSourceFileNameIndex() const { +uint32_t DbiModuleDescriptor::getSourceFileNameIndex() const { return Layout->SrcFileNameNI; } -uint32_t ModInfo::getPdbFilePathNameIndex() const { +uint32_t DbiModuleDescriptor::getPdbFilePathNameIndex() const { return Layout->PdbFilePathNI; } -StringRef ModInfo::getModuleName() const { return ModuleName; } +StringRef DbiModuleDescriptor::getModuleName() const { return ModuleName; } -StringRef ModInfo::getObjFileName() const { return ObjFileName; } +StringRef DbiModuleDescriptor::getObjFileName() const { return ObjFileName; } -uint32_t ModInfo::getRecordLength() const { +uint32_t DbiModuleDescriptor::getRecordLength() const { uint32_t M = ModuleName.str().size() + 1; uint32_t O = ObjFileName.str().size() + 1; uint32_t Size = sizeof(ModuleInfoHeader) + M + O; diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 73c45a953520..867864e47dce 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -1,4 +1,4 @@ -//===- ModInfoBuilder.cpp - PDB Module Info Stream Creation -----*- C++ -*-===// +//===- DbiModuleDescriptorBuilder.cpp - PDB Mod Info Creation ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryItemStream.h" @@ -35,47 +36,72 @@ template <> struct BinaryItemTraits<CVSymbol> { }; } -static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize) { +static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, + uint32_t C13Size) { uint32_t Size = sizeof(uint32_t); // Signature Size += SymbolByteSize; // Symbol Data - Size += 0; // TODO: Layout.LineBytes - Size += 0; // TODO: Layout.C13Bytes + Size += 0; // TODO: Layout.C11Bytes + Size += C13Size; // C13 Debug Info Size Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) Size += 0; // GlobalRefs substream bytes return Size; } -ModInfoBuilder::ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex, - msf::MSFBuilder &Msf) +DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, + uint32_t ModIndex, + msf::MSFBuilder &Msf) : MSF(Msf), ModuleName(ModuleName) { Layout.Mod = ModIndex; } -uint16_t ModInfoBuilder::getStreamIndex() const { return Layout.ModDiStream; } +DbiModuleDescriptorBuilder::~DbiModuleDescriptorBuilder() {} -void ModInfoBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; } +uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const { + return Layout.ModDiStream; +} + +void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { + ObjFileName = Name; +} -void ModInfoBuilder::addSymbol(CVSymbol Symbol) { +void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); SymbolByteSize += Symbol.data().size(); } -void ModInfoBuilder::addSourceFile(StringRef Path) { +void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { SourceFiles.push_back(Path); } -uint32_t ModInfoBuilder::calculateSerializedLength() const { +uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const { + uint32_t Result = 0; + for (const auto &Builder : C13Builders) { + assert(Builder && "Empty C13 Fragment Builder!"); + Result += Builder->calculateSerializedLength(); + } + return Result; +} + +uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const { uint32_t L = sizeof(Layout); uint32_t M = ModuleName.size() + 1; uint32_t O = ObjFileName.size() + 1; return alignTo(L + M + O, sizeof(uint32_t)); } -void ModInfoBuilder::finalize() { - Layout.C13Bytes = 0; +template <typename T> struct Foo { + explicit Foo(T &&Answer) : Answer(Answer) {} + + T Answer; +}; + +template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); } + +void DbiModuleDescriptorBuilder::finalize() { Layout.FileNameOffs = 0; // TODO: Fix this Layout.Flags = 0; // TODO: Fix this - Layout.LineBytes = 0; + Layout.C11Bytes = 0; + Layout.C13Bytes = calculateC13DebugInfoSize(); (void)Layout.Mod; // Set in constructor (void)Layout.ModDiStream; // Set in finalizeMsfLayout Layout.NumFiles = SourceFiles.size(); @@ -87,18 +113,20 @@ void ModInfoBuilder::finalize() { Layout.SymBytes = SymbolByteSize + sizeof(uint32_t); } -Error ModInfoBuilder::finalizeMsfLayout() { +Error DbiModuleDescriptorBuilder::finalizeMsfLayout() { this->Layout.ModDiStream = kInvalidStreamIndex; - auto ExpectedSN = MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize)); + uint32_t C13Size = calculateC13DebugInfoSize(); + auto ExpectedSN = + MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize, C13Size)); if (!ExpectedSN) return ExpectedSN.takeError(); Layout.ModDiStream = *ExpectedSN; return Error::success(); } -Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter, - const msf::MSFLayout &MsfLayout, - WritableBinaryStreamRef MsfBuffer) { +Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, + const msf::MSFLayout &MsfLayout, + WritableBinaryStreamRef MsfBuffer) { // We write the Modi record to the `ModiWriter`, but we additionally write its // symbol stream to a brand new stream. if (auto EC = ModiWriter.writeObject(Layout)) @@ -125,7 +153,13 @@ Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter, if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) return EC; // TODO: Write C11 Line data - // TODO: Write C13 Line data + + for (const auto &Builder : C13Builders) { + assert(Builder && "Empty C13 Fragment Builder!"); + if (auto EC = Builder->commit(SymbolWriter)) + return EC; + } + // TODO: Figure out what GlobalRefs substream actually is and populate it. if (auto EC = SymbolWriter.writeInteger<uint32_t>(0)) return EC; @@ -134,3 +168,43 @@ Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter, } return Error::success(); } + +void DbiModuleDescriptorBuilder::addC13Fragment( + std::unique_ptr<ModuleDebugLineFragment> Lines) { + ModuleDebugLineFragment &Frag = *Lines; + + // File Checksums have to come first, so push an empty entry on if this + // is the first. + if (C13Builders.empty()) + C13Builders.push_back(nullptr); + + this->LineInfo.push_back(std::move(Lines)); + C13Builders.push_back( + llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag)); +} + +void DbiModuleDescriptorBuilder::addC13Fragment( + std::unique_ptr<codeview::ModuleDebugInlineeLineFragment> Inlinees) { + ModuleDebugInlineeLineFragment &Frag = *Inlinees; + + // File Checksums have to come first, so push an empty entry on if this + // is the first. + if (C13Builders.empty()) + C13Builders.push_back(nullptr); + + this->Inlinees.push_back(std::move(Inlinees)); + C13Builders.push_back( + llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag)); +} + +void DbiModuleDescriptorBuilder::setC13FileChecksums( + std::unique_ptr<ModuleDebugFileChecksumFragment> Checksums) { + assert(!ChecksumInfo && "Can't have more than one checksum info!"); + + if (C13Builders.empty()) + C13Builders.push_back(nullptr); + + ChecksumInfo = std::move(Checksums); + C13Builders[0] = llvm::make_unique<ModuleDebugFragmentRecordBuilder>( + ChecksumInfo->kind(), *ChecksumInfo); +} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp index b9f53578d326..db703809f7c9 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -10,9 +10,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" @@ -146,7 +146,7 @@ Error DbiStream::reload() { if (ECSubstream.getLength() > 0) { BinaryStreamReader ECReader(ECSubstream); - if (auto EC = ECNames.load(ECReader)) + if (auto EC = ECNames.reload(ECReader)) return EC; } @@ -252,11 +252,12 @@ Error DbiStream::initializeModInfoArray() { if (ModInfoSubstream.getLength() == 0) return Error::success(); - // Since each ModInfo in the stream is a variable length, we have to iterate + // Since each DbiModuleDescriptor in the stream is a variable length, we have + // to iterate // them to know how many there actually are. BinaryStreamReader Reader(ModInfoSubstream); - VarStreamArray<ModInfo> ModInfoArray; + VarStreamArray<DbiModuleDescriptor> ModInfoArray; if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength())) return EC; for (auto &Info : ModInfoArray) { @@ -371,10 +372,12 @@ Error DbiStream::initializeFileInfo() { NumSourceFiles += Count; // This is the array that in the reference implementation corresponds to - // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a + // `DbiModuleDescriptor::FileLayout::FileNameOffs`, which is commented there + // as being a // pointer. Due to the mentioned problems of pointers causing difficulty // when reading from the file on 64-bit systems, we continue to ignore that - // field in `ModInfo`, and instead build a vector of StringRefs and stores + // field in `DbiModuleDescriptor`, and instead build a vector of StringRefs + // and stores // them in `ModuleInfoEx`. The value written to and read from the file is // not used anyway, it is only there as a way to store the offsets for the // purposes of later accessing the names at runtime. diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index a203aea60fe7..c19a2f0d3110 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -12,8 +12,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -74,10 +74,11 @@ uint32_t DbiStreamBuilder::calculateSerializedLength() const { calculateSectionMapStreamSize() + calculateDbgStreamsSize(); } -Expected<ModInfoBuilder &> +Expected<DbiModuleDescriptorBuilder &> DbiStreamBuilder::addModuleInfo(StringRef ModuleName) { uint32_t Index = ModiList.size(); - auto MIB = llvm::make_unique<ModInfoBuilder>(ModuleName, Index, Msf); + auto MIB = + llvm::make_unique<DbiModuleDescriptorBuilder>(ModuleName, Index, Msf); auto M = MIB.get(); auto Result = ModiMap.insert(std::make_pair(ModuleName, std::move(MIB))); @@ -100,6 +101,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) { return Error::success(); } +Expected<uint32_t> DbiStreamBuilder::getSourceFileNameIndex(StringRef File) { + auto NameIter = SourceFileNames.find(File); + if (NameIter == SourceFileNames.end()) + return make_error<RawError>(raw_error_code::no_entry, + "The specified source file was not found"); + return NameIter->getValue(); +} + uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const { uint32_t Size = 0; for (const auto &M : ModiList) diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index e87e2c407593..d7a203746a0d 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -1,4 +1,4 @@ -//===- ModStream.cpp - PDB Module Info Stream Access ----------------------===// +//===- ModuleDebugStream.cpp - PDB Module Info Stream Access --------------===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/ModStream.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -21,20 +21,22 @@ #include <cstdint> using namespace llvm; +using namespace llvm::codeview; using namespace llvm::msf; using namespace llvm::pdb; -ModStream::ModStream(const ModInfo &Module, - std::unique_ptr<MappedBlockStream> Stream) +ModuleDebugStreamRef::ModuleDebugStreamRef( + const DbiModuleDescriptor &Module, + std::unique_ptr<MappedBlockStream> Stream) : Mod(Module), Stream(std::move(Stream)) {} -ModStream::~ModStream() = default; +ModuleDebugStreamRef::~ModuleDebugStreamRef() = default; -Error ModStream::reload() { +Error ModuleDebugStreamRef::reload() { BinaryStreamReader Reader(*Stream); uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize(); - uint32_t C11Size = Mod.getLineInfoByteSize(); + uint32_t C11Size = Mod.getC11LineInfoByteSize(); uint32_t C13Size = Mod.getC13LineInfoByteSize(); if (C11Size > 0 && C13Size > 0) @@ -48,13 +50,14 @@ Error ModStream::reload() { if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) return EC; - if (auto EC = Reader.readStreamRef(LinesSubstream, C11Size)) + if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size)) return EC; if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) return EC; BinaryStreamReader LineReader(C13LinesSubstream); - if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining())) + if (auto EC = + LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining())) return EC; uint32_t GlobalRefsSize; @@ -70,20 +73,17 @@ Error ModStream::reload() { } iterator_range<codeview::CVSymbolArray::Iterator> -ModStream::symbols(bool *HadError) const { - // It's OK if the stream is empty. - if (SymbolsSubstream.getUnderlyingStream().getLength() == 0) - return make_range(SymbolsSubstream.end(), SymbolsSubstream.end()); +ModuleDebugStreamRef::symbols(bool *HadError) const { return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); } -iterator_range<codeview::ModuleSubstreamArray::Iterator> -ModStream::lines(bool *HadError) const { - return make_range(LineInfo.begin(HadError), LineInfo.end()); +llvm::iterator_range<ModuleDebugStreamRef::LinesAndChecksumsIterator> +ModuleDebugStreamRef::linesAndChecksums() const { + return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end()); } -bool ModStream::hasLineInfo() const { - return C13LinesSubstream.getLength() > 0 || LinesSubstream.getLength() > 0; +bool ModuleDebugStreamRef::hasLineInfo() const { + return C13LinesSubstream.getLength() > 0; } -Error ModStream::commit() { return Error::success(); } +Error ModuleDebugStreamRef::commit() { return Error::success(); } diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index 943e7fa13ab7..859295d2c7d3 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -15,9 +15,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/BinaryStream.h" @@ -337,8 +337,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { return *Symbols; } -Expected<StringTable &> PDBFile::getStringTable() { - if (!Strings || !StringTableStream) { +Expected<PDBStringTable &> PDBFile::getStringTable() { + if (!Strings) { auto IS = getPDBInfoStream(); if (!IS) return IS.takeError(); @@ -350,12 +350,13 @@ Expected<StringTable &> PDBFile::getStringTable() { if (!NS) return NS.takeError(); + auto N = llvm::make_unique<PDBStringTable>(); BinaryStreamReader Reader(**NS); - auto N = llvm::make_unique<StringTable>(); - if (auto EC = N->load(Reader)) + if (auto EC = N->reload(Reader)) return std::move(EC); - Strings = std::move(N); + assert(Reader.bytesRemaining() == 0); StringTableStream = std::move(*NS); + Strings = std::move(N); } return *Strings; } @@ -389,7 +390,7 @@ bool PDBFile::hasPDBSymbolStream() { bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } -bool PDBFile::hasStringTable() { +bool PDBFile::hasPDBStringTable() { auto IS = getPDBInfoStream(); if (!IS) return false; diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index b3c84903bc7e..4dd965c69071 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -17,8 +17,8 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" @@ -67,7 +67,9 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { return *Ipi; } -StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { + return Strings; +} Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); @@ -78,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { - uint32_t StringTableSize = Strings.finalize(); + uint32_t StringsLen = Strings.calculateSerializedSize(); - if (auto EC = addNamedStream("/names", StringTableSize)) + if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); @@ -107,6 +109,13 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { return Msf->build(); } +Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { + uint32_t SN = 0; + if (!NamedStreams.get(Name, SN)) + return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + return SN; +} + Error PDBFileBuilder::commit(StringRef Filename) { auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) @@ -144,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } - uint32_t StringTableStreamNo = 0; - if (!NamedStreams.get("/names", StringTableStreamNo)) - return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + auto ExpectedSN = getNamedStreamIndex("/names"); + if (!ExpectedSN) + return ExpectedSN.takeError(); auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - StringTableStreamNo); + *ExpectedSN); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp new file mode 100644 index 000000000000..e84573fe07b8 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -0,0 +1,134 @@ +//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getNameCount() const { return NameCount; } +uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } +uint32_t PDBStringTable::getSignature() const { return Header->Signature; } + +Error PDBStringTable::readHeader(BinaryStreamReader &Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + if (Header->Signature != PDBStringTableSignature) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (Header->HashVersion != 1 && Header->HashVersion != 2) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported hash version"); + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { + BinaryStreamRef Stream; + if (auto EC = Reader.readStreamRef(Stream)) + return EC; + + if (auto EC = Strings.initialize(Stream)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { + const support::ulittle32_t *HashCount; + if (auto EC = Reader.readObject(HashCount)) + return EC; + + if (auto EC = Reader.readArray(IDs, *HashCount)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read bucket array")); + } + + return Error::success(); +} + +Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) { + if (auto EC = Reader.readInteger(NameCount)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::reload(BinaryStreamReader &Reader) { + + BinaryStreamReader SectionReader; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader)); + if (auto EC = readHeader(SectionReader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize); + if (auto EC = readStrings(SectionReader)) + return EC; + + // We don't know how long the hash table is until we parse it, so let the + // function responsible for doing that figure it out. + if (auto EC = readHashTable(Reader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t)); + if (auto EC = readEpilogue(SectionReader)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Expected<StringRef> PDBStringTable::getStringForID(uint32_t ID) const { + return Strings.getString(ID); +} + +Expected<uint32_t> PDBStringTable::getIDForString(StringRef Str) const { + uint32_t Hash = + (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + auto ExpectedStr = getStringForID(ID); + if (!ExpectedStr) + return ExpectedStr.takeError(); + + if (*ExpectedStr == Str) + return ID; + } + return make_error<RawError>(raw_error_code::no_entry); +} + +FixedStreamArray<support::ulittle32_t> PDBStringTable::name_ids() const { + return IDs; +} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp new file mode 100644 index 000000000000..a472181a4895 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -0,0 +1,133 @@ +//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t PDBStringTableBuilder::insert(StringRef S) { + return Strings.insert(S); +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t PDBStringTableBuilder::calculateHashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); + + return Size; +} + +uint32_t PDBStringTableBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(PDBStringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += calculateHashTableSize(); + Size += sizeof(uint32_t); // The /names stream ends with the string count. + return Size; +} + +Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { + // Write a header + PDBStringTableHeader H; + H.Signature = PDBStringTableSignature; + H.HashVersion = 1; + H.ByteSize = Strings.calculateSerializedSize(); + if (auto EC = Writer.writeObject(H)) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector<ulittle32_t> Buckets(BucketCount); + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter SectionWriter; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); + if (auto EC = writeHeader(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = + Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); + if (auto EC = writeHashTable(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); + if (auto EC = writeEpilogue(SectionWriter)) + return EC; + + return Error::success(); +} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp deleted file mode 100644 index 7e28389b8383..000000000000 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTable.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::pdb; - -StringTable::StringTable() {} - -Error StringTable::load(BinaryStreamReader &Stream) { - ByteSize = Stream.getLength(); - - const StringTableHeader *H; - if (auto EC = Stream.readObject(H)) - return EC; - - if (H->Signature != StringTableSignature) - return make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table signature"); - if (H->HashVersion != 1 && H->HashVersion != 2) - return make_error<RawError>(raw_error_code::corrupt_file, - "Unsupported hash version"); - - Signature = H->Signature; - HashVersion = H->HashVersion; - if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table byte length")); - - const support::ulittle32_t *HashCount; - if (auto EC = Stream.readObject(HashCount)) - return EC; - - if (auto EC = Stream.readArray(IDs, *HashCount)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Could not read bucket array")); - - if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) - return make_error<RawError>(raw_error_code::corrupt_file, - "Missing name count"); - - if (auto EC = Stream.readInteger(NameCount)) - return EC; - - if (Stream.bytesRemaining() > 0) - return make_error<RawError>(raw_error_code::stream_too_long, - "Unexpected bytes found in string table"); - - return Error::success(); -} - -uint32_t StringTable::getByteSize() const { - return ByteSize; -} - -StringRef StringTable::getStringForID(uint32_t ID) const { - if (ID == IDs[0]) - return StringRef(); - - // NamesBuffer is a buffer of null terminated strings back to back. ID is - // the starting offset of the string we're looking for. So just seek into - // the desired offset and a read a null terminated stream from that offset. - StringRef Result; - BinaryStreamReader NameReader(NamesBuffer); - NameReader.setOffset(ID); - if (auto EC = NameReader.readCString(Result)) - consumeError(std::move(EC)); - return Result; -} - -uint32_t StringTable::getIDForString(StringRef Str) const { - uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); - size_t Count = IDs.size(); - uint32_t Start = Hash % Count; - for (size_t I = 0; I < Count; ++I) { - // The hash is just a starting point for the search, but if it - // doesn't work we should find the string no matter what, because - // we iterate the entire array. - uint32_t Index = (Start + I) % Count; - - uint32_t ID = IDs[Index]; - StringRef S = getStringForID(ID); - if (S == Str) - return ID; - } - // IDs[0] contains the ID of the "invalid" entry. - return IDs[0]; -} - -FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const { - return IDs; -} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp deleted file mode 100644 index e0f8370ab608..000000000000 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp +++ /dev/null @@ -1,102 +0,0 @@ -//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::support::endian; -using namespace llvm::pdb; - -uint32_t StringTableBuilder::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -static uint32_t computeBucketCount(uint32_t NumStrings) { - // The /names stream is basically an on-disk open-addressing hash table. - // Hash collisions are resolved by linear probing. We cannot make - // utilization 100% because it will make the linear probing extremely - // slow. But lower utilization wastes disk space. As a reasonable - // load factor, we choose 80%. We need +1 because slot 0 is reserved. - return (NumStrings + 1) * 1.25; -} - -uint32_t StringTableBuilder::finalize() { - uint32_t Size = 0; - Size += sizeof(StringTableHeader); - Size += StringSize; - Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. - - uint32_t BucketCount = computeBucketCount(Strings.size()); - Size += BucketCount * sizeof(uint32_t); - - Size += - sizeof(uint32_t); // The /names stream ends with the number of strings. - return Size; -} - -Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const { - // Write a header - StringTableHeader H; - H.Signature = StringTableSignature; - H.HashVersion = 1; - H.ByteSize = StringSize; - if (auto EC = Writer.writeObject(H)) - return EC; - - // Write a string table. - uint32_t StringStart = Writer.getOffset(); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - Writer.setOffset(StringStart + Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - } - Writer.setOffset(StringStart + StringSize); - - // Write a hash table. - uint32_t BucketCount = computeBucketCount(Strings.size()); - if (auto EC = Writer.writeInteger(BucketCount)) - return EC; - std::vector<ulittle32_t> Buckets(BucketCount); - - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - uint32_t Hash = hashStringV1(S); - - for (uint32_t I = 0; I != BucketCount; ++I) { - uint32_t Slot = (Hash + I) % BucketCount; - if (Slot == 0) - continue; // Skip reserved slot - if (Buckets[Slot] != 0) - continue; - Buckets[Slot] = Offset; - break; - } - } - - if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) - return EC; - if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size()))) - return EC; - return Error::success(); -} diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 7e56859c40e6..a41a065a983c 100644 --- a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -467,8 +467,9 @@ extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -std::string LLVMSymbolizer::DemangleName(const std::string &Name, - const SymbolizableModule *ModInfo) { +std::string +LLVMSymbolizer::DemangleName(const std::string &Name, + const SymbolizableModule *DbiModuleDescriptor) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. @@ -496,7 +497,7 @@ std::string LLVMSymbolizer::DemangleName(const std::string &Name, return (result == 0) ? Name : std::string(DemangledName); } #endif - if (ModInfo && ModInfo->isWin32Module()) + if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) return std::string(demanglePE32ExternCFunc(Name)); return Name; } diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 10b4e98b6079..96844439e721 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1565,7 +1565,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, Tmp = Tmp.zext(SrcBitSize); Tmp = TempSrc.AggregateVal[SrcElt++].IntVal; Tmp = Tmp.zext(DstBitSize); - Tmp = Tmp.shl(ShiftAmt); + Tmp <<= ShiftAmt; ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; Elt.IntVal |= Tmp; } diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f780137d0874..50f63fb8dd39 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -819,6 +819,34 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, } } +void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend) { + bool isBE = Arch == Triple::bpfeb; + + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_BPF_NONE: + break; + case ELF::R_BPF_64_64: { + write(isBE, Section.getAddressWithOffset(Offset), Value + Addend); + DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + case ELF::R_BPF_64_32: { + Value += Addend; + assert(Value <= UINT32_MAX); + write(isBE, Section.getAddressWithOffset(Offset), static_cast<uint32_t>(Value)); + DEBUG(dbgs() << "Writing " << format("%p", Value) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + } +} + // The target location for the relocation is described by RE.SectionID and // RE.Offset. RE.SectionID can be used to find the SectionEntry. Each // SectionEntry has three members describing its location. @@ -879,6 +907,10 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, case Triple::systemz: resolveSystemZRelocation(Section, Offset, Value, Type, Addend); break; + case Triple::bpfel: + case Triple::bpfeb: + resolveBPFRelocation(Section, Offset, Value, Type, Addend); + break; default: llvm_unreachable("Unsupported CPU type!"); } diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 498979705b77..84dd810101f3 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -58,6 +58,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + unsigned getMaxStubSize() override { if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return 20; // movz; movk; movk; movk; br diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index b7de07170de9..4c6e3e3788bd 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -332,6 +332,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::HHVM: Out << "hhvmcc"; break; case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break; + case CallingConv::AMDGPU_HS: Out << "amdgpu_hs"; break; case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; @@ -1719,6 +1720,7 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N, Printer.printMetadata("templateParams", N->getRawTemplateParams()); Printer.printMetadata("declaration", N->getRawDeclaration()); Printer.printMetadata("variables", N->getRawVariables()); + Printer.printMetadata("thrownTypes", N->getRawThrownTypes()); Out << ")"; } @@ -1755,8 +1757,6 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getRawFile()); - Printer.printInt("line", N->getLine()); Printer.printBool("exportSymbols", N->getExportSymbols(), false); Out << ")"; } @@ -2084,8 +2084,7 @@ public: void printModule(const Module *M); void writeOperand(const Value *Op, bool PrintType); - void writeParamOperand(const Value *Operand, AttributeList Attrs, - unsigned Idx); + void writeParamOperand(const Value *Operand, AttributeSet Attrs); void writeOperandBundles(ImmutableCallSite CS); void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, @@ -2101,7 +2100,7 @@ public: void printIndirectSymbol(const GlobalIndirectSymbol *GIS); void printComdat(const Comdat *C); void printFunction(const Function *F); - void printArgument(const Argument *FA, AttributeList Attrs, unsigned Idx); + void printArgument(const Argument *FA, AttributeSet Attrs); void printBasicBlock(const BasicBlock *BB); void printInstructionLine(const Instruction &I); void printInstruction(const Instruction &I); @@ -2180,7 +2179,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, } void AssemblyWriter::writeParamOperand(const Value *Operand, - AttributeList Attrs, unsigned Idx) { + AttributeSet Attrs) { if (!Operand) { Out << "<null operand!>"; return; @@ -2189,8 +2188,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, // Print the type TypePrinter.print(Operand->getType(), Out); // Print parameter attributes list - if (Attrs.hasAttributes(Idx)) - Out << ' ' << Attrs.getAsString(Idx); + if (Attrs.hasAttributes()) + Out << ' ' << Attrs.getAsString(); Out << ' '; // Print the operand WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); @@ -2653,17 +2652,17 @@ void AssemblyWriter::printFunction(const Function *F) { // Output type... TypePrinter.print(FT->getParamType(I), Out); - if (Attrs.hasAttributes(I + 1)) - Out << ' ' << Attrs.getAsString(I + 1); + AttributeSet ArgAttrs = Attrs.getParamAttributes(I); + if (ArgAttrs.hasAttributes()) + Out << ' ' << ArgAttrs.getAsString(); } } else { // The arguments are meaningful here, print them in detail. - unsigned Idx = 1; for (const Argument &Arg : F->args()) { // Insert commas as we go... the first arg doesn't get a comma - if (Idx != 1) + if (Arg.getArgNo() != 0) Out << ", "; - printArgument(&Arg, Attrs, Idx++); + printArgument(&Arg, Attrs.getParamAttributes(Arg.getArgNo())); } } @@ -2725,14 +2724,13 @@ void AssemblyWriter::printFunction(const Function *F) { /// printArgument - This member is called for every argument that is passed into /// the function. Simply print it out /// -void AssemblyWriter::printArgument(const Argument *Arg, AttributeList Attrs, - unsigned Idx) { +void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { // Output type... TypePrinter.print(Arg->getType(), Out); // Output parameter attributes list - if (Attrs.hasAttributes(Idx)) - Out << ' ' << Attrs.getAsString(Idx); + if (Attrs.hasAttributes()) + Out << ' ' << Attrs.getAsString(); // Output name, if available... if (Arg->hasName()) { @@ -3026,7 +3024,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) { if (op > 0) Out << ", "; - writeParamOperand(CI->getArgOperand(op), PAL, op + 1); + writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op)); } // Emit an ellipsis if this is a musttail call in a vararg function. This @@ -3069,7 +3067,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) { if (op) Out << ", "; - writeParamOperand(II->getArgOperand(op), PAL, op + 1); + writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op)); } Out << ')'; diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index e30414537a6c..3b1140ab542c 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -315,6 +315,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "returns_twice"; if (hasAttribute(Attribute::SExt)) return "signext"; + if (hasAttribute(Attribute::Speculatable)) + return "speculatable"; if (hasAttribute(Attribute::StackProtect)) return "ssp"; if (hasAttribute(Attribute::StackProtectReq)) @@ -934,7 +936,9 @@ AttributeList AttributeList::get(LLVMContext &C, AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (hasAttribute(Index, Kind)) return *this; - return addAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, @@ -942,7 +946,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, StringRef Value) const { AttrBuilder B; B.addAttribute(Kind, Value); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttribute(LLVMContext &C, @@ -976,14 +980,6 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - if (!pImpl) return Attrs; - if (!Attrs.pImpl) return *this; - - return addAttributes(C, Index, Attrs.getAttributes(Index)); -} - -AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, const AttrBuilder &B) const { if (!B.hasAttributes()) return *this; @@ -1032,18 +1028,17 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); -} - -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index))); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, @@ -1101,7 +1096,7 @@ AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1109,7 +1104,7 @@ AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableOrNullAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1118,7 +1113,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, const Optional<unsigned> &NumElemsArg) { AttrBuilder B; B.addAllocSizeAttr(ElemSizeArg, NumElemsArg); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } //===----------------------------------------------------------------------===// @@ -1128,7 +1123,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); } AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const { - return getAttributes(ArgNo + 1); + return getAttributes(ArgNo + FirstArgIndex); } AttributeSet AttributeList::getRetAttributes() const { @@ -1189,8 +1184,12 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const { return getAttributes(Index).getAttribute(Kind); } -unsigned AttributeList::getParamAlignment(unsigned Index) const { - return getAttributes(Index).getAlignment(); +unsigned AttributeList::getRetAlignment() const { + return getAttributes(ReturnIndex).getAlignment(); +} + +unsigned AttributeList::getParamAlignment(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } unsigned AttributeList::getStackAlignment(unsigned Index) const { @@ -1363,15 +1362,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) { - for (Attribute Attr : A.getAttributes(Index)) { - if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { - removeAttribute(Attr.getKindAsEnum()); - } else { - assert(Attr.isStringAttribute() && "Invalid attribute type!"); - removeAttribute(Attr.getKindAsString()); - } - } - + remove(A.getAttributes(Index)); return *this; } @@ -1513,25 +1504,16 @@ bool AttrBuilder::hasAttributes() const { return !Attrs.none() || !TargetDepAttrs.empty(); } -bool AttrBuilder::hasAttributes(AttributeList A, uint64_t Index) const { - unsigned Slot = ~0U; - for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) - if (A.getSlotIndex(I) == Index) { - Slot = I; - break; - } +bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const { + AttributeSet AS = AL.getAttributes(Index); - assert(Slot != ~0U && "Couldn't find the index!"); - - for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E; - ++I) { - Attribute Attr = *I; + for (Attribute Attr : AS) { if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { - if (Attrs[I->getKindAsEnum()]) + if (contains(Attr.getKindAsEnum())) return true; } else { assert(Attr.isStringAttribute() && "Invalid attribute kind!"); - return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end(); + return contains(Attr.getKindAsString()); } } @@ -1621,12 +1603,10 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { // If upgrading the SSP attribute, clear out the old SSP Attributes first. // Having multiple SSP attributes doesn't actually hurt, but it adds useless // clutter to the IR. - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectStrong) - .addAttribute(Attribute::StackProtectReq); - AttributeList OldSSPAttr = - AttributeList::get(Caller.getContext(), AttributeList::FunctionIndex, B); + AttrBuilder OldSSPAttr; + OldSSPAttr.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong) + .addAttribute(Attribute::StackProtectReq); if (Callee.hasFnAttribute(Attribute::StackProtectReq)) { Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr); diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp index 2897434a2b8d..8bcba7672315 100644 --- a/contrib/llvm/lib/IR/AutoUpgrade.cpp +++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp @@ -467,6 +467,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + // Renaming gather/scatter intrinsics with no address space overloading + // to the new overload which includes an address space + if (Name.startswith("masked.gather.")) { + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_gather, Tys); + return true; + } + } + if (Name.startswith("masked.scatter.")) { + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[1]}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_scatter, Tys); + return true; + } + } break; } case 'n': { @@ -2072,7 +2093,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::masked_load: - case Intrinsic::masked_store: { + case Intrinsic::masked_store: + case Intrinsic::masked_gather: + case Intrinsic::masked_scatter: { SmallVector<Value *, 4> Args(CI->arg_operands().begin(), CI->arg_operands().end()); NewCall = Builder.CreateCall(NewFn, Args); diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp index 0cc38b025209..5425676e4edc 100644 --- a/contrib/llvm/lib/IR/ConstantRange.cpp +++ b/contrib/llvm/lib/IR/ConstantRange.cpp @@ -29,12 +29,9 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { - if (Full) - Lower = Upper = APInt::getMaxValue(BitWidth); - else - Lower = Upper = APInt::getMinValue(BitWidth); -} +ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) + : Lower(Full ? APInt::getMaxValue(BitWidth) : APInt::getMinValue(BitWidth)), + Upper(Lower) {} ConstantRange::ConstantRange(APInt V) : Lower(std::move(V)), Upper(Lower + 1) {} @@ -66,49 +63,49 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred, APInt UMax(CR.getUnsignedMax()); if (UMax.isMinValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(APInt::getMinValue(W), UMax); + return ConstantRange(APInt::getMinValue(W), std::move(UMax)); } case CmpInst::ICMP_SLT: { APInt SMax(CR.getSignedMax()); if (SMax.isMinSignedValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(APInt::getSignedMinValue(W), SMax); + return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax)); } case CmpInst::ICMP_ULE: { APInt UMax(CR.getUnsignedMax()); if (UMax.isMaxValue()) return ConstantRange(W); - return ConstantRange(APInt::getMinValue(W), UMax + 1); + return ConstantRange(APInt::getMinValue(W), std::move(UMax) + 1); } case CmpInst::ICMP_SLE: { APInt SMax(CR.getSignedMax()); if (SMax.isMaxSignedValue()) return ConstantRange(W); - return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); + return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax) + 1); } case CmpInst::ICMP_UGT: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMaxValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(UMin + 1, APInt::getNullValue(W)); + return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W)); } case CmpInst::ICMP_SGT: { APInt SMin(CR.getSignedMin()); if (SMin.isMaxSignedValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(SMin + 1, APInt::getSignedMinValue(W)); + return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W)); } case CmpInst::ICMP_UGE: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMinValue()) return ConstantRange(W); - return ConstantRange(UMin, APInt::getNullValue(W)); + return ConstantRange(std::move(UMin), APInt::getNullValue(W)); } case CmpInst::ICMP_SGE: { APInt SMin(CR.getSignedMin()); if (SMin.isMinSignedValue()) return ConstantRange(W); - return ConstantRange(SMin, APInt::getSignedMinValue(W)); + return ConstantRange(std::move(SMin), APInt::getSignedMinValue(W)); } } } @@ -198,7 +195,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return ConstantRange(BitWidth, false); if (auto *C = Other.getSingleElement()) - if (C->isMinValue()) + if (C->isNullValue()) // Full set: nothing signed / unsigned wraps when added to 0. return ConstantRange(BitWidth); @@ -210,8 +207,8 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, -Other.getUnsignedMax())); if (NoWrapKind & OBO::NoSignedWrap) { - APInt SignedMin = Other.getSignedMin(); - APInt SignedMax = Other.getSignedMax(); + const APInt &SignedMin = Other.getSignedMin(); + const APInt &SignedMax = Other.getSignedMax(); if (SignedMax.isStrictlyPositive()) Result = SubsetIntersect( @@ -246,11 +243,8 @@ bool ConstantRange::isSignWrappedSet() const { } APInt ConstantRange::getSetSize() const { - if (isFullSet()) { - APInt Size(getBitWidth()+1, 0); - Size.setBit(getBitWidth()); - return Size; - } + if (isFullSet()) + return APInt::getOneBitSet(getBitWidth()+1, getBitWidth()); // This is also correct for wrapped sets. return (Upper - Lower).zext(getBitWidth()+1); @@ -279,7 +273,6 @@ APInt ConstantRange::getUnsignedMin() const { } APInt ConstantRange::getSignedMax() const { - APInt SignedMax(APInt::getSignedMaxValue(getBitWidth())); if (!isWrappedSet()) { APInt UpperMinusOne = getUpper() - 1; if (getLower().sle(UpperMinusOne)) @@ -435,16 +428,13 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { return ConstantRange(CR.Lower, Upper); } - APInt L = Lower, U = Upper; - if (CR.Lower.ult(L)) - L = CR.Lower; - if ((CR.Upper - 1).ugt(U - 1)) - U = CR.Upper; + APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower; + APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper; if (L == 0 && U == 0) return ConstantRange(getBitWidth()); - return ConstantRange(L, U); + return ConstantRange(std::move(L), std::move(U)); } if (!CR.isWrappedSet()) { @@ -485,13 +475,10 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper)) return ConstantRange(getBitWidth()); - APInt L = Lower, U = Upper; - if (CR.Upper.ugt(U)) - U = CR.Upper; - if (CR.Lower.ult(L)) - L = CR.Lower; + APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower; + APInt U = CR.Upper.ugt(Upper) ? CR.Upper : Upper; - return ConstantRange(L, U); + return ConstantRange(std::move(L), std::move(U)); } ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, @@ -518,14 +505,14 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, auto BW = getBitWidth(); APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth); APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth); - return ConstantRange(Min, Max); + return ConstantRange(std::move(Min), std::move(Max)); } case Instruction::SIToFP: { // TODO: use input range if available auto BW = getBitWidth(); APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth); APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth); - return ConstantRange(SMin, SMax); + return ConstantRange(std::move(SMin), std::move(SMax)); } case Instruction::FPTrunc: case Instruction::FPExt: @@ -547,7 +534,8 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { APInt LowerExt(DstTySize, 0); if (!Upper) // special case: [X, 0) -- not really wrapping around LowerExt = Lower.zext(DstTySize); - return ConstantRange(LowerExt, APInt::getOneBitSet(DstTySize, SrcTySize)); + return ConstantRange(std::move(LowerExt), + APInt::getOneBitSet(DstTySize, SrcTySize)); } return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize)); @@ -578,9 +566,8 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { if (isFullSet()) return ConstantRange(DstTySize, /*isFullSet=*/true); - APInt MaxValue = APInt::getMaxValue(DstTySize).zext(getBitWidth()); - APInt MaxBitValue(getBitWidth(), 0); - MaxBitValue.setBit(DstTySize); + APInt MaxValue = APInt::getLowBitsSet(getBitWidth(), DstTySize); + APInt MaxBitValue = APInt::getOneBitSet(getBitWidth(), DstTySize); APInt LowerDiv(Lower), UpperDiv(Upper); ConstantRange Union(DstTySize, /*isFullSet=*/false); @@ -594,7 +581,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { return ConstantRange(DstTySize, /*isFullSet=*/true); Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize)); - UpperDiv = APInt::getMaxValue(getBitWidth()); + UpperDiv.setAllBits(); // Union covers the MaxValue case, so return if the remaining range is just // MaxValue. @@ -606,7 +593,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { if (LowerDiv.uge(MaxValue)) { APInt Div(getBitWidth(), 0); APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv); - UpperDiv = UpperDiv - MaxBitValue * Div; + UpperDiv -= MaxBitValue * Div; } if (UpperDiv.ule(MaxValue)) @@ -614,10 +601,10 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { UpperDiv.trunc(DstTySize)).unionWith(Union); // The truncated value wraps around. Check if we can do better than fullset. - APInt UpperModulo = UpperDiv - MaxBitValue; - if (UpperModulo.ult(LowerDiv)) + UpperDiv -= MaxBitValue; + if (UpperDiv.ult(LowerDiv)) return ConstantRange(LowerDiv.trunc(DstTySize), - UpperModulo.trunc(DstTySize)).unionWith(Union); + UpperDiv.trunc(DstTySize)).unionWith(Union); return ConstantRange(DstTySize, /*isFullSet=*/true); } @@ -688,7 +675,7 @@ ConstantRange::add(const ConstantRange &Other) const { if (NewLower == NewUpper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - ConstantRange X = ConstantRange(NewLower, NewUpper); + ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper)); if (X.isSizeStrictlySmallerThanOf(*this) || X.isSizeStrictlySmallerThanOf(Other)) // We've wrapped, therefore, full set. @@ -721,7 +708,7 @@ ConstantRange::sub(const ConstantRange &Other) const { if (NewLower == NewUpper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - ConstantRange X = ConstantRange(NewLower, NewUpper); + ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper)); if (X.isSizeStrictlySmallerThanOf(*this) || X.isSizeStrictlySmallerThanOf(Other)) // We've wrapped, therefore, full set. @@ -792,7 +779,7 @@ ConstantRange::smax(const ConstantRange &Other) const { APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -805,7 +792,7 @@ ConstantRange::umax(const ConstantRange &Other) const { APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -818,7 +805,7 @@ ConstantRange::smin(const ConstantRange &Other) const { APInt NewU = APIntOps::smin(getSignedMax(), Other.getSignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -831,7 +818,7 @@ ConstantRange::umin(const ConstantRange &Other) const { APInt NewU = APIntOps::umin(getUnsignedMax(), Other.getUnsignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -850,7 +837,7 @@ ConstantRange::udiv(const ConstantRange &RHS) const { if (RHS.getUpper() == 1) RHS_umin = RHS.getLower(); else - RHS_umin = APInt(getBitWidth(), 1); + RHS_umin = 1; } APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1; @@ -860,7 +847,7 @@ ConstantRange::udiv(const ConstantRange &RHS) const { if (Lower == Upper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(Lower, Upper); + return ConstantRange(std::move(Lower), std::move(Upper)); } ConstantRange @@ -873,7 +860,7 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const { APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()); if (umin.isAllOnesValue()) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1); + return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(umin) + 1); } ConstantRange @@ -884,9 +871,9 @@ ConstantRange::binaryOr(const ConstantRange &Other) const { // TODO: replace this with something less conservative APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); - if (umax.isMinValue()) + if (umax.isNullValue()) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(umax, APInt::getNullValue(getBitWidth())); + return ConstantRange(std::move(umax), APInt::getNullValue(getBitWidth())); } ConstantRange @@ -900,7 +887,7 @@ ConstantRange::shl(const ConstantRange &Other) const { // there's no overflow! APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros()); if (Zeros.ugt(Other.getUnsignedMax())) - return ConstantRange(min, max + 1); + return ConstantRange(std::move(min), std::move(max) + 1); // FIXME: implement the other tricky cases return ConstantRange(getBitWidth(), /*isFullSet=*/true); @@ -916,7 +903,7 @@ ConstantRange::lshr(const ConstantRange &Other) const { if (min == max + 1) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(min, max + 1); + return ConstantRange(std::move(min), std::move(max) + 1); } ConstantRange ConstantRange::inverse() const { diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp index 9407c805b92a..7e6f9a7804b9 100644 --- a/contrib/llvm/lib/IR/DIBuilder.cpp +++ b/contrib/llvm/lib/IR/DIBuilder.cpp @@ -676,13 +676,14 @@ DISubprogram *DIBuilder::createFunction( DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags, - bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) { + bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl, + DITypeArray ThrownTypes) { auto *Node = getSubprogram( /* IsDistinct = */ isDefinition, VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr, TParams, Decl, - MDTuple::getTemporary(VMContext, None).release()); + MDTuple::getTemporary(VMContext, None).release(), ThrownTypes); if (isDefinition) AllSubprograms.push_back(Node); @@ -694,23 +695,22 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl( DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags, - bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) { + bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl, + DITypeArray ThrownTypes) { return DISubprogram::getTemporary( VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr, - TParams, Decl, nullptr) + TParams, Decl, nullptr, ThrownTypes) .release(); } -DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, - StringRef LinkageName, DIFile *F, - unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, - unsigned VK, unsigned VIndex, - int ThisAdjustment, DIType *VTableHolder, - DINode::DIFlags Flags, bool isOptimized, - DITemplateParameterArray TParams) { +DISubprogram *DIBuilder::createMethod( + DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, + unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, + bool isDefinition, unsigned VK, unsigned VIndex, int ThisAdjustment, + DIType *VTableHolder, DINode::DIFlags Flags, bool isOptimized, + DITemplateParameterArray TParams, DITypeArray ThrownTypes) { assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); @@ -719,7 +719,7 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, /* IsDistinct = */ isDefinition, VMContext, cast<DIScope>(Context), Name, LinkageName, F, LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder, VK, VIndex, ThisAdjustment, Flags, isOptimized, - isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr); + isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr, ThrownTypes); if (isDefinition) AllSubprograms.push_back(SP); @@ -728,10 +728,15 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, } DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name, - DIFile *File, unsigned LineNo, bool ExportSymbols) { - return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name, - LineNo, ExportSymbols); + + // It is okay to *not* make anonymous top-level namespaces distinct, because + // all nodes that have an anonymous namespace as their parent scope are + // guaranteed to be unique and/or are linked to their containing + // DICompileUnit. This decision is an explicit tradeoff of link time versus + // memory usage versus code simplicity and may get revisited in the future. + return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), Name, + ExportSymbols); } DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name, diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp index d14c6018d409..cdbe237766a3 100644 --- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp @@ -15,6 +15,7 @@ #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -214,6 +215,10 @@ void GenericDINode::recalculateHash() { #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS) \ return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS), \ Storage, Context.pImpl->CLASS##s) +#define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS) \ + return storeImpl(new (NUM_OPS) \ + CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \ + Storage, Context.pImpl->CLASS##s) DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo, StorageType Storage, bool ShouldCreate) { @@ -441,21 +446,30 @@ DISubprogram *DISubprogram::getImpl( Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables, - StorageType Storage, bool ShouldCreate) { + Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); assert(isCanonical(LinkageName) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP( - DISubprogram, - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, - ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment, - Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables)); - Metadata *Ops[] = {File, Scope, Name, Name, - LinkageName, Type, ContainingType, Unit, - TemplateParams, Declaration, Variables}; - DEFINE_GETIMPL_STORE(DISubprogram, (Line, ScopeLine, Virtuality, VirtualIndex, - ThisAdjustment, Flags, IsLocalToUnit, - IsDefinition, IsOptimized), - Ops); + DISubprogram, (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, + IsDefinition, ScopeLine, ContainingType, Virtuality, + VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit, + TemplateParams, Declaration, Variables, ThrownTypes)); + SmallVector<Metadata *, 11> Ops = { + File, Scope, Name, LinkageName, Type, Unit, + Declaration, Variables, ContainingType, TemplateParams, ThrownTypes}; + if (!ThrownTypes) { + Ops.pop_back(); + if (!TemplateParams) { + Ops.pop_back(); + if (!ContainingType) + Ops.pop_back(); + } + } + DEFINE_GETIMPL_STORE_N(DISubprogram, + (Line, ScopeLine, Virtuality, VirtualIndex, + ThisAdjustment, Flags, IsLocalToUnit, IsDefinition, + IsOptimized), + Ops, Ops.size()); } bool DISubprogram::describes(const Function *F) const { @@ -493,13 +507,13 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context, } DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope, - Metadata *File, MDString *Name, unsigned Line, - bool ExportSymbols, StorageType Storage, - bool ShouldCreate) { + MDString *Name, bool ExportSymbols, + StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, Name, Line, ExportSymbols)); - Metadata *Ops[] = {File, Scope, Name}; - DEFINE_GETIMPL_STORE(DINamespace, (Line, ExportSymbols), Ops); + DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, Name, ExportSymbols)); + // The nullptr is for DIScope's File operand. This should be refactored. + Metadata *Ops[] = {nullptr, Scope, Name}; + DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops); } DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope, @@ -647,6 +661,43 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) { return None; } +void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops, + int64_t Offset) { + if (Offset > 0) { + Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(Offset); + } else if (Offset < 0) { + Ops.push_back(dwarf::DW_OP_minus); + Ops.push_back(-Offset); + } +} + +DIExpression *DIExpression::prepend(const DIExpression *Expr, bool Deref, + int64_t Offset, bool StackValue) { + SmallVector<uint64_t, 8> Ops; + appendOffset(Ops, Offset); + if (Deref) + Ops.push_back(dwarf::DW_OP_deref); + if (Expr) + for (auto Op : Expr->expr_ops()) { + // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. + if (StackValue) { + if (Op.getOp() == dwarf::DW_OP_stack_value) + StackValue = false; + else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_stack_value); + StackValue = false; + } + } + Ops.push_back(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + Ops.push_back(Op.getArg(I)); + } + if (StackValue) + Ops.push_back(dwarf::DW_OP_stack_value); + return DIExpression::get(Expr->getContext(), Ops); +} + bool DIExpression::isConstant() const { // Recognize DW_OP_constu C DW_OP_stack_value (DW_OP_LLVM_fragment Len Ofs)?. if (getNumElements() != 3 && getNumElements() != 6) diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index e1f5fdea44e4..58c060550322 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -84,20 +84,21 @@ bool Argument::hasByValOrInAllocaAttr() const { unsigned Argument::getParamAlignment() const { assert(getType()->isPointerTy() && "Only pointers have alignments"); - return getParent()->getParamAlignment(getArgNo()+1); - + return getParent()->getParamAlignment(getArgNo()); } uint64_t Argument::getDereferenceableBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableBytes(getArgNo()+1); + return getParent()->getDereferenceableBytes(getArgNo() + + AttributeList::FirstArgIndex); } uint64_t Argument::getDereferenceableOrNullBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableOrNullBytes(getArgNo()+1); + return getParent()->getDereferenceableOrNullBytes( + getArgNo() + AttributeList::FirstArgIndex); } bool Argument::hasNestAttr() const { @@ -140,29 +141,21 @@ bool Argument::onlyReadsMemory() const { void Argument::addAttrs(AttrBuilder &B) { AttributeList AL = getParent()->getAttributes(); - AL = AL.addAttributes(Parent->getContext(), getArgNo() + 1, B); + AL = AL.addAttributes(Parent->getContext(), + getArgNo() + AttributeList::FirstArgIndex, B); getParent()->setAttributes(AL); } void Argument::addAttr(Attribute::AttrKind Kind) { - getParent()->addAttribute(getArgNo() + 1, Kind); + getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); } void Argument::addAttr(Attribute Attr) { - getParent()->addAttribute(getArgNo() + 1, Attr); -} - -void Argument::removeAttr(AttributeList AS) { - assert(AS.getNumSlots() <= 1 && - "Trying to remove more than one attribute set from an argument!"); - AttrBuilder B(AS, AS.getSlotIndex(0)); - getParent()->removeAttributes( - getArgNo() + 1, - AttributeList::get(Parent->getContext(), getArgNo() + 1, B)); + getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Attr); } void Argument::removeAttr(Attribute::AttrKind Kind) { - getParent()->removeAttribute(getArgNo() + 1, Kind); + getParent()->removeAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); } bool Argument::hasAttribute(Attribute::AttrKind Kind) const { @@ -338,7 +331,7 @@ void Function::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } -void Function::addAttributes(unsigned i, AttributeList Attrs) { +void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.addAttributes(getContext(), i, Attrs); setAttributes(PAL); @@ -356,7 +349,7 @@ void Function::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } -void Function::removeAttributes(unsigned i, AttributeList Attrs) { +void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttributes(getContext(), i, Attrs); setAttributes(PAL); @@ -584,13 +577,12 @@ enum IIT_Info { IIT_SAME_VEC_WIDTH_ARG = 31, IIT_PTR_TO_ARG = 32, IIT_PTR_TO_ELT = 33, - IIT_VEC_OF_PTRS_TO_ELT = 34, + IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, IIT_V1024 = 37 }; - static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) { IIT_Info Info = IIT_Info(Infos[NextElt++]); @@ -726,10 +718,11 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } - case IIT_VEC_OF_PTRS_TO_ELT: { - unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); - OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt, - ArgInfo)); + case IIT_VEC_OF_ANYPTRS_TO_ELT: { + unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back( + IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo)); return; } case IIT_EMPTYSTRUCT: @@ -818,7 +811,6 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos, Elts[i] = DecodeFixedType(Infos, Tys, Context); return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements)); } - case IITDescriptor::Argument: return Tys[D.getArgumentNumber()]; case IITDescriptor::ExtendArgument: { @@ -860,15 +852,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos, Type *EltTy = VTy->getVectorElementType(); return PointerType::getUnqual(EltTy); } - case IITDescriptor::VecOfPtrsToElt: { - Type *Ty = Tys[D.getArgumentNumber()]; - VectorType *VTy = dyn_cast<VectorType>(Ty); - if (!VTy) - llvm_unreachable("Expected an argument of Vector Type"); - Type *EltTy = VTy->getVectorElementType(); - return VectorType::get(PointerType::getUnqual(EltTy), - VTy->getNumElements()); - } + case IITDescriptor::VecOfAnyPtrsToElt: + // Return the overloaded type (which determines the pointers address space) + return Tys[D.getOverloadArgNumber()]; } llvm_unreachable("unhandled"); } @@ -1064,11 +1050,22 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> return (!ThisArgType || !ReferenceType || ThisArgType->getElementType() != ReferenceType->getElementType()); } - case IITDescriptor::VecOfPtrsToElt: { - if (D.getArgumentNumber() >= ArgTys.size()) + case IITDescriptor::VecOfAnyPtrsToElt: { + unsigned RefArgNumber = D.getRefArgNumber(); + + // This may only be used when referring to a previous argument. + if (RefArgNumber >= ArgTys.size()) return true; - VectorType * ReferenceType = - dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]); + + // Record the overloaded type + assert(D.getOverloadArgNumber() == ArgTys.size() && + "Table consistency error"); + ArgTys.push_back(Ty); + + // Verify the overloaded type "matches" the Ref type. + // i.e. Ty is a vector with the same width as Ref. + // Composed of pointers to the same element type as Ref. + VectorType *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]); VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty); if (!ThisArgVecTy || !ReferenceType || (ReferenceType->getVectorNumElements() != diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp index fd5ae71a2f3c..e265a823687f 100644 --- a/contrib/llvm/lib/IR/IRBuilder.cpp +++ b/contrib/llvm/lib/IR/IRBuilder.cpp @@ -293,11 +293,13 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align, Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name); + return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes, + Name); } /// \brief Create a call to a Masked Scatter intrinsic. @@ -323,11 +325,13 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs, if (!Mask) Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + + Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy }); + return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes); } template <typename T0, typename T1, typename T2, typename T3> diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index 76582e334d1f..a60cc375d568 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -335,12 +335,12 @@ Value *CallInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } @@ -356,6 +356,10 @@ void CallInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void CallInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -368,6 +372,10 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void CallInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); @@ -501,7 +509,8 @@ static Instruction *createMalloc(Instruction *InsertBefore, MCall->setTailCall(); if (Function *F = dyn_cast<Function>(MallocFunc)) { MCall->setCallingConv(F->getCallingConv()); - if (!F->doesNotAlias(0)) F->setDoesNotAlias(0); + if (!F->returnDoesNotAlias()) + F->setReturnDoesNotAlias(); } assert(!MCall->getType()->isVoidTy() && "Malloc has void return type"); @@ -694,12 +703,12 @@ Value *InvokeInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } @@ -755,6 +764,10 @@ void InvokeInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void InvokeInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -767,6 +780,10 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void InvokeInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h index 0ee0b9c0da25..9db30da89ed0 100644 --- a/contrib/llvm/lib/IR/LLVMContextImpl.h +++ b/contrib/llvm/lib/IR/LLVMContextImpl.h @@ -52,12 +52,12 @@ class Value; struct DenseMapAPIntKeyInfo { static inline APInt getEmptyKey() { APInt V(nullptr, 0); - V.VAL = 0; + V.U.VAL = 0; return V; } static inline APInt getTombstoneKey() { APInt V(nullptr, 0); - V.VAL = 1; + V.U.VAL = 1; return V; } static unsigned getHashValue(const APInt &Key) { @@ -552,6 +552,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> { Metadata *TemplateParams; Metadata *Declaration; Metadata *Variables; + Metadata *ThrownTypes; MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, @@ -559,7 +560,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> { Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, unsigned Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams, - Metadata *Declaration, Metadata *Variables) + Metadata *Declaration, Metadata *Variables, + Metadata *ThrownTypes) : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), ScopeLine(ScopeLine), @@ -567,7 +569,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> { VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment), Flags(Flags), IsOptimized(IsOptimized), Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration), - Variables(Variables) {} + Variables(Variables), ThrownTypes(ThrownTypes) {} MDNodeKeyImpl(const DISubprogram *N) : Scope(N->getRawScope()), Name(N->getRawName()), LinkageName(N->getRawLinkageName()), File(N->getRawFile()), @@ -578,7 +580,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> { ThisAdjustment(N->getThisAdjustment()), Flags(N->getFlags()), IsOptimized(N->isOptimized()), Unit(N->getRawUnit()), TemplateParams(N->getRawTemplateParams()), - Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {} + Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()), + ThrownTypes(N->getRawThrownTypes()) {} bool isKeyOf(const DISubprogram *RHS) const { return Scope == RHS->getRawScope() && Name == RHS->getRawName() && @@ -595,7 +598,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> { Unit == RHS->getUnit() && TemplateParams == RHS->getRawTemplateParams() && Declaration == RHS->getRawDeclaration() && - Variables == RHS->getRawVariables(); + Variables == RHS->getRawVariables() && + ThrownTypes == RHS->getRawThrownTypes(); } unsigned getHashValue() const { // If this is a declaration inside an ODR type, only hash the type and the @@ -695,26 +699,21 @@ template <> struct MDNodeKeyImpl<DILexicalBlockFile> { template <> struct MDNodeKeyImpl<DINamespace> { Metadata *Scope; - Metadata *File; MDString *Name; - unsigned Line; bool ExportSymbols; - MDNodeKeyImpl(Metadata *Scope, Metadata *File, MDString *Name, unsigned Line, - bool ExportSymbols) - : Scope(Scope), File(File), Name(Name), Line(Line), - ExportSymbols(ExportSymbols) {} + MDNodeKeyImpl(Metadata *Scope, MDString *Name, bool ExportSymbols) + : Scope(Scope), Name(Name), ExportSymbols(ExportSymbols) {} MDNodeKeyImpl(const DINamespace *N) - : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getRawName()), - Line(N->getLine()), ExportSymbols(N->getExportSymbols()) {} + : Scope(N->getRawScope()), Name(N->getRawName()), + ExportSymbols(N->getExportSymbols()) {} bool isKeyOf(const DINamespace *RHS) const { - return Scope == RHS->getRawScope() && File == RHS->getRawFile() && - Name == RHS->getRawName() && Line == RHS->getLine() && + return Scope == RHS->getRawScope() && Name == RHS->getRawName() && ExportSymbols == RHS->getExportSymbols(); } unsigned getHashValue() const { - return hash_combine(Scope, File, Name, Line); + return hash_combine(Scope, Name); } }; diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp index 7228de3d2370..2411dc5ce7dc 100644 --- a/contrib/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm/lib/IR/Metadata.cpp @@ -967,7 +967,7 @@ static void addRange(SmallVectorImpl<ConstantInt *> &EndPoints, MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { // Given two ranges, we want to compute the union of the ranges. This - // is slightly complitade by having to combine the intervals and merge + // is slightly complicated by having to combine the intervals and merge // the ones that overlap. if (!A || !B) @@ -976,7 +976,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { if (A == B) return A; - // First, walk both lists in older of the lower boundary of each interval. + // First, walk both lists in order of the lower boundary of each interval. // At each step, try to merge the new interval to the last one we adedd. SmallVector<ConstantInt *, 4> EndPoints; int AI = 0; diff --git a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp index 9072f4bc7b12..01e1b8168afa 100644 --- a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -16,54 +16,6 @@ #include "llvm/ADT/StringMap.h" using namespace llvm; -// Create the combined module index/summary from multiple -// per-module instances. -void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other, - uint64_t NextModuleId) { - if (Other->modulePaths().empty()) - return; - - assert(Other->modulePaths().size() == 1 && - "Can only merge from an single-module index at that time"); - - StringRef OtherModPath = Other->modulePaths().begin()->first(); - StringRef ModPath = addModulePath(OtherModPath, NextModuleId, - Other->getModuleHash(OtherModPath)) - ->first(); - - for (auto &OtherGlobalValSummaryLists : *Other) { - GlobalValue::GUID ValueGUID = OtherGlobalValSummaryLists.first; - GlobalValueSummaryList &List = OtherGlobalValSummaryLists.second; - - // Assert that the value summary list only has one entry, since we shouldn't - // have duplicate names within a single per-module index. - assert(List.size() == 1); - std::unique_ptr<GlobalValueSummary> Summary = std::move(List.front()); - - // Note the module path string ref was copied above and is still owned by - // the original per-module index. Reset it to the new module path - // string reference owned by the combined index. - Summary->setModulePath(ModPath); - - // Add new value summary to existing list. There may be duplicates when - // combining GlobalValueMap entries, due to COMDAT values. Any local - // values were given unique global IDs. - addGlobalValueSummary(ValueGUID, std::move(Summary)); - } -} - -void ModuleSummaryIndex::removeEmptySummaryEntries() { - for (auto MI = begin(), MIE = end(); MI != MIE;) { - // Only expect this to be called on a per-module index, which has a single - // entry per value entry list. - assert(MI->second.size() == 1); - if (!MI->second[0]) - MI = GlobalValueMap.erase(MI); - else - ++MI; - } -} - // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp index d83bdf2acd43..02b40c93b5d8 100644 --- a/contrib/llvm/lib/IR/Value.cpp +++ b/contrib/llvm/lib/IR/Value.cpp @@ -578,9 +578,9 @@ unsigned Value::getPointerDereferenceableBytes(const DataLayout &DL, CanBeNull = true; } } else if (auto CS = ImmutableCallSite(this)) { - DerefBytes = CS.getDereferenceableBytes(0); + DerefBytes = CS.getDereferenceableBytes(AttributeList::ReturnIndex); if (DerefBytes == 0) { - DerefBytes = CS.getDereferenceableOrNullBytes(0); + DerefBytes = CS.getDereferenceableOrNullBytes(AttributeList::ReturnIndex); CanBeNull = true; } } else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) { @@ -649,7 +649,7 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const { Align = DL.getPrefTypeAlignment(AllocatedType); } } else if (auto CS = ImmutableCallSite(this)) - Align = CS.getAttributes().getParamAlignment(AttributeList::ReturnIndex); + Align = CS.getAttributes().getRetAlignment(); else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); @@ -711,7 +711,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) { setPrevPtr(List); if (Next) { Next->setPrevPtr(&Next); - assert(V == Next->V && "Added to wrong list?"); + assert(getValPtr() == Next->getValPtr() && "Added to wrong list?"); } } @@ -726,14 +726,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) { } void ValueHandleBase::AddToUseList() { - assert(V && "Null pointer doesn't have a use list!"); + assert(getValPtr() && "Null pointer doesn't have a use list!"); - LLVMContextImpl *pImpl = V->getContext().pImpl; + LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl; - if (V->HasValueHandle) { + if (getValPtr()->HasValueHandle) { // If this value already has a ValueHandle, then it must be in the // ValueHandles map already. - ValueHandleBase *&Entry = pImpl->ValueHandles[V]; + ValueHandleBase *&Entry = pImpl->ValueHandles[getValPtr()]; assert(Entry && "Value doesn't have any handles?"); AddToExistingUseList(&Entry); return; @@ -747,10 +747,10 @@ void ValueHandleBase::AddToUseList() { DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles; const void *OldBucketPtr = Handles.getPointerIntoBucketsArray(); - ValueHandleBase *&Entry = Handles[V]; + ValueHandleBase *&Entry = Handles[getValPtr()]; assert(!Entry && "Value really did already have handles?"); AddToExistingUseList(&Entry); - V->HasValueHandle = true; + getValPtr()->HasValueHandle = true; // If reallocation didn't happen or if this was the first insertion, don't // walk the table. @@ -762,14 +762,14 @@ void ValueHandleBase::AddToUseList() { // Okay, reallocation did happen. Fix the Prev Pointers. for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(), E = Handles.end(); I != E; ++I) { - assert(I->second && I->first == I->second->V && + assert(I->second && I->first == I->second->getValPtr() && "List invariant broken!"); I->second->setPrevPtr(&I->second); } } void ValueHandleBase::RemoveFromUseList() { - assert(V && V->HasValueHandle && + assert(getValPtr() && getValPtr()->HasValueHandle && "Pointer doesn't have a use list!"); // Unlink this from its use list. @@ -786,11 +786,11 @@ void ValueHandleBase::RemoveFromUseList() { // If the Next pointer was null, then it is possible that this was the last // ValueHandle watching VP. If so, delete its entry from the ValueHandles // map. - LLVMContextImpl *pImpl = V->getContext().pImpl; + LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl; DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles; if (Handles.isPointerIntoBucketsArray(PrevPtr)) { - Handles.erase(V); - V->HasValueHandle = false; + Handles.erase(getValPtr()); + getValPtr()->HasValueHandle = false; } } @@ -820,13 +820,10 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { switch (Entry->getKind()) { case Assert: break; - case Tracking: - // Mark that this value has been deleted by setting it to an invalid Value - // pointer. - Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey()); - break; case Weak: - // Weak just goes to null, which will unlink it from the list. + case WeakTracking: + // WeakTracking and Weak just go to null, which unlinks them + // from the list. Entry->operator=(nullptr); break; case Callback: @@ -874,16 +871,10 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { switch (Entry->getKind()) { case Assert: - // Asserting handle does not follow RAUW implicitly. - break; - case Tracking: - // Tracking goes to new value like a WeakVH. Note that this may make it - // something incompatible with its templated type. We don't want to have a - // virtual (or inline) interface to handle this though, so instead we make - // the TrackingVH accessors guarantee that a client never sees this value. - - LLVM_FALLTHROUGH; case Weak: + // Asserting and Weak handles do not follow RAUW implicitly. + break; + case WeakTracking: // Weak goes to the new value, which will unlink it from Old's list. Entry->operator=(New); break; @@ -895,18 +886,17 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { } #ifndef NDEBUG - // If any new tracking or weak value handles were added while processing the + // If any new weak value handles were added while processing the // list, then complain about it now. if (Old->HasValueHandle) for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next) switch (Entry->getKind()) { - case Tracking: - case Weak: + case WeakTracking: dbgs() << "After RAUW from " << *Old->getType() << " %" << Old->getName() << " to " << *New->getType() << " %" << New->getName() << "\n"; - llvm_unreachable("A tracking or weak value handle still pointed to the" - " old value!\n"); + llvm_unreachable( + "A weak tracking value handle still pointed to the old value!\n"); default: break; } diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index 4e04020f206e..65e124562493 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -1050,6 +1050,14 @@ void Verifier::visitDISubprogram(const DISubprogram &N) { // Subprogram declarations (part of the type hierarchy). AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N); } + + if (auto *RawThrownTypes = N.getRawThrownTypes()) { + auto *ThrownTypes = dyn_cast<MDTuple>(RawThrownTypes); + AssertDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes); + for (Metadata *Op : ThrownTypes->operands()) + AssertDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes, + Op); + } } void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) { @@ -1195,9 +1203,9 @@ void Verifier::visitComdat(const Comdat &C) { void Verifier::visitModuleIdents(const Module &M) { const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident"); - if (!Idents) + if (!Idents) return; - + // llvm.ident takes a list of metadata entry. Each entry has only one string. // Scan each llvm.ident entry and make sure that this requirement is met. for (const MDNode *N : Idents->operands()) { @@ -1207,7 +1215,7 @@ void Verifier::visitModuleIdents(const Module &M) { ("invalid value for llvm.ident metadata entry operand" "(the operand should be a string)"), N->getOperand(0)); - } + } } void Verifier::visitModuleFlags(const Module &M) { @@ -1344,6 +1352,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) { case Attribute::InaccessibleMemOnly: case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::AllocSize: + case Attribute::Speculatable: return true; default: break; @@ -1829,7 +1838,7 @@ void Verifier::verifyStatepoint(ImmutableCallSite CS) { Assert(ExpectedNumArgs <= (int)CS.arg_size(), "gc.statepoint too few arguments according to length fields", &CI); - // Check that the only uses of this gc.statepoint are gc.result or + // Check that the only uses of this gc.statepoint are gc.result or // gc.relocate calls which are tied to this statepoint and thus part // of the same statepoint sequence for (const User *U : CI.users()) { @@ -1975,6 +1984,7 @@ void Verifier::visitFunction(const Function &F) { "Calling convention requires void return type", &F); LLVM_FALLTHROUGH; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -2602,6 +2612,15 @@ void Verifier::verifyCallSite(CallSite CS) { Assert(verifyAttributeCount(Attrs, CS.arg_size()), "Attribute after last parameter!", I); + if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) { + // Don't allow speculatable on call sites, unless the underlying function + // declaration is also speculatable. + Function *Callee + = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + Assert(Callee && Callee->isSpeculatable(), + "speculatable attribute may not apply to call sites", I); + } + // Verify call attributes. verifyFunctionAttrs(FTy, Attrs, I); @@ -2754,7 +2773,7 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) { Copy.addAttribute(AK); } if (Attrs.hasParamAttribute(I, Attribute::Alignment)) - Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1)); + Copy.addAlignmentAttr(Attrs.getParamAlignment(I)); return Copy; } @@ -3900,7 +3919,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. - for (Value *V : CS.args()) + for (Value *V : CS.args()) if (auto *MD = dyn_cast<MetadataAsValue>(V)) visitMetadataAsValue(*MD, CS.getCaller()); @@ -3973,9 +3992,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { auto IsValidAlignment = [&](uint64_t Alignment) { return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); }; - - uint64_t DstAlignment = CS.getParamAlignment(1), - SrcAlignment = CS.getParamAlignment(2); + + uint64_t DstAlignment = CS.getParamAlignment(0), + SrcAlignment = CS.getParamAlignment(1); Assert(IsValidAlignment(DstAlignment), "incorrect alignment of the destination argument", @@ -4212,7 +4231,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { } case Intrinsic::masked_load: { Assert(CS.getType()->isVectorTy(), "masked_load: must return a vector", CS); - + Value *Ptr = CS.getArgOperand(0); //Value *Alignment = CS.getArgOperand(1); Value *Mask = CS.getArgOperand(2); @@ -4222,12 +4241,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // DataTy is the overloaded type Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType(); - Assert(DataTy == CS.getType(), + Assert(DataTy == CS.getType(), "masked_load: return must match pointer type", CS); Assert(PassThru->getType() == DataTy, "masked_load: pass through and data type must match", CS); Assert(Mask->getType()->getVectorNumElements() == - DataTy->getVectorNumElements(), + DataTy->getVectorNumElements(), "masked_load: vector mask must be same length as data", CS); break; } @@ -4241,10 +4260,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // DataTy is the overloaded type Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType(); - Assert(DataTy == Val->getType(), + Assert(DataTy == Val->getType(), "masked_store: storee must match pointer type", CS); Assert(Mask->getType()->getVectorNumElements() == - DataTy->getVectorNumElements(), + DataTy->getVectorNumElements(), "masked_store: vector mask must be same length as data", CS); break; } diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp index 1bc0d7361d4c..0afa1ba6ecd6 100644 --- a/contrib/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm/lib/LTO/LTO.cpp @@ -25,7 +25,6 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/Linker/IRMover.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" @@ -592,11 +591,9 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary(); - if (!SummaryOrErr) - return SummaryOrErr.takeError(); - ThinLTO.CombinedIndex.mergeFrom(std::move(*SummaryOrErr), - ThinLTO.ModuleMap.size()); + if (Error Err = + BM.readSummary(ThinLTO.CombinedIndex, ThinLTO.ModuleMap.size())) + return Err; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 0d845a26d0c2..440275c34258 100644 --- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -33,7 +33,6 @@ #include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -566,25 +565,18 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { * "thin-link". */ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { - std::unique_ptr<ModuleSummaryIndex> CombinedIndex; + std::unique_ptr<ModuleSummaryIndex> CombinedIndex = + llvm::make_unique<ModuleSummaryIndex>(); uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { - Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create( - ModuleBuffer.getMemBuffer()); - if (!ObjOrErr) { + if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(), + *CombinedIndex, NextModuleId++)) { // FIXME diagnose logAllUnhandledErrors( - ObjOrErr.takeError(), errs(), - "error: can't create ModuleSummaryIndexObjectFile for buffer: "); + std::move(Err), errs(), + "error: can't create module summary index for buffer: "); return nullptr; } - auto Index = (*ObjOrErr)->takeIndex(); - if (CombinedIndex) { - CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); - } else { - CombinedIndex = std::move(Index); - } } return CombinedIndex; } diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp index ee9c25cda94f..e86db933af3c 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -63,7 +63,7 @@ using namespace llvm; namespace { -typedef DenseMap<const MCSectionELF *, uint32_t> SectionIndexMapTy; +using SectionIndexMapTy = DenseMap<const MCSectionELF *, uint32_t>; class ELFObjectWriter; @@ -194,8 +194,8 @@ public: ELFSymbolData &MSD, const MCAsmLayout &Layout); // Start and end offset of each section - typedef std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>> - SectionOffsetsTy; + using SectionOffsetsTy = + std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>; bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, @@ -208,7 +208,7 @@ public: uint64_t &FixedValue) override; // Map from a signature symbol to the group section index - typedef DenseMap<const MCSymbol *, unsigned> RevGroupMapTy; + using RevGroupMapTy = DenseMap<const MCSymbol *, unsigned>; /// Compute the symbol table data /// diff --git a/contrib/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm/lib/MC/MCCodeView.cpp index 99a5c11a498e..2b97ecc0fd2c 100644 --- a/contrib/llvm/lib/MC/MCCodeView.cpp +++ b/contrib/llvm/lib/MC/MCCodeView.cpp @@ -145,7 +145,7 @@ void CodeViewContext::emitStringTable(MCObjectStreamer &OS) { MCSymbol *StringBegin = Ctx.createTempSymbol("strtab_begin", false), *StringEnd = Ctx.createTempSymbol("strtab_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::StringTable), 4); + OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::StringTable), 4); OS.emitAbsoluteSymbolDiff(StringEnd, StringBegin, 4); OS.EmitLabel(StringBegin); @@ -172,7 +172,7 @@ void CodeViewContext::emitFileChecksums(MCObjectStreamer &OS) { MCSymbol *FileBegin = Ctx.createTempSymbol("filechecksums_begin", false), *FileEnd = Ctx.createTempSymbol("filechecksums_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::FileChecksums), 4); + OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::FileChecksums), 4); OS.emitAbsoluteSymbolDiff(FileEnd, FileBegin, 4); OS.EmitLabel(FileBegin); @@ -197,7 +197,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, MCSymbol *LineBegin = Ctx.createTempSymbol("linetable_begin", false), *LineEnd = Ctx.createTempSymbol("linetable_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::Lines), 4); + OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::Lines), 4); OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4); OS.EmitLabel(LineBegin); OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0); @@ -208,7 +208,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, bool HaveColumns = any_of(Locs, [](const MCCVLineEntry &LineEntry) { return LineEntry.getColumn() != 0; }); - OS.EmitIntValue(HaveColumns ? int(LineFlags::HaveColumns) : 0, 2); + OS.EmitIntValue(HaveColumns ? int(LF_HaveColumns) : 0, 2); OS.emitAbsoluteSymbolDiff(FuncEnd, FuncBegin, 4); for (auto I = Locs.begin(), E = Locs.end(); I != E;) { diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index 9f94264684f9..b685790910d0 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -286,6 +286,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); break; + case Triple::bpfel: + case Triple::bpfeb: + FDECFIEncoding = dwarf::DW_EH_PE_sdata8; + break; default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; break; diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index 2fa9c03b608e..f36a21bf1121 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -412,7 +412,7 @@ private: DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE, DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED, DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE, - DK_MACROS_ON, DK_MACROS_OFF, + DK_MACROS_ON, DK_MACROS_OFF, DK_ALTMACRO, DK_NOALTMACRO, DK_MACRO, DK_EXITM, DK_ENDM, DK_ENDMACRO, DK_PURGEM, DK_SLEB128, DK_ULEB128, DK_ERR, DK_ERROR, DK_WARNING, @@ -484,7 +484,8 @@ private: bool parseDirectiveEndMacro(StringRef Directive); bool parseDirectiveMacro(SMLoc DirectiveLoc); bool parseDirectiveMacrosOnOff(StringRef Directive); - + // alternate macro mode directives + bool parseDirectiveAltmacro(StringRef Directive); // ".bundle_align_mode" bool parseDirectiveBundleAlignMode(); // ".bundle_lock" @@ -1922,6 +1923,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveMacrosOnOff(IDVal); case DK_MACRO: return parseDirectiveMacro(IDLoc); + case DK_ALTMACRO: + case DK_NOALTMACRO: + return parseDirectiveAltmacro(IDVal); case DK_EXITM: return parseDirectiveExitMacro(IDVal); case DK_ENDM: @@ -2270,9 +2274,18 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, } else { bool VarargParameter = HasVararg && Index == (NParameters - 1); for (const AsmToken &Token : A[Index]) + // For altmacro mode, you can write '%expr'. + // The prefix '%' evaluates the expression 'expr' + // and uses the result as a string (e.g. replace %(1+2) with the string "3"). + // Here, we identify the integer token which is the result of the + // absolute expression evaluation and replace it with its string representation. + if ((Lexer.IsaAltMacroMode()) && + (*(Token.getString().begin()) == '%') && Token.is(AsmToken::Integer)) + // Emit an integer value to the buffer. + OS << Token.getIntVal(); // We expect no quotes around the string's contents when // parsing for varargs. - if (Token.getKind() != AsmToken::String || VarargParameter) + else if (Token.isNot(AsmToken::String) || VarargParameter) OS << Token.getString(); else OS << Token.getStringContents(); @@ -2443,13 +2456,29 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, NamedParametersFound = true; } + bool Vararg = HasVararg && Parameter == (NParameters - 1); if (NamedParametersFound && FA.Name.empty()) return Error(IDLoc, "cannot mix positional and keyword arguments"); - bool Vararg = HasVararg && Parameter == (NParameters - 1); - if (parseMacroArgument(FA.Value, Vararg)) - return true; + if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Percent)) { + SMLoc StrLoc = Lexer.getLoc(); + SMLoc EndLoc; + const MCExpr *AbsoluteExp; + int64_t Value; + /// Eat '%' + Lex(); + if (parseExpression(AbsoluteExp, EndLoc)) + return false; + if (!AbsoluteExp->evaluateAsAbsolute(Value)) + return Error(StrLoc, "expected absolute expression"); + const char *StrChar = StrLoc.getPointer(); + const char *EndChar = EndLoc.getPointer(); + AsmToken newToken(AsmToken::Integer, StringRef(StrChar , EndChar - StrChar), Value); + FA.Value.push_back(newToken); + } + else if(parseMacroArgument(FA.Value, Vararg)) + return true; unsigned PI = Parameter; if (!FA.Name.empty()) { @@ -3841,6 +3870,19 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveAltmacro +/// ::= .altmacro +/// ::= .noaltmacro +bool AsmParser::parseDirectiveAltmacro(StringRef Directive) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Directive + "' directive"); + if (Directive == ".altmacro") + getLexer().SetAltMacroMode(true); + else + getLexer().SetAltMacroMode(false); + return false; +} + /// parseDirectiveMacrosOnOff /// ::= .macros_on /// ::= .macros_off @@ -4938,6 +4980,8 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".err"] = DK_ERR; DirectiveKindMap[".error"] = DK_ERROR; DirectiveKindMap[".warning"] = DK_WARNING; + DirectiveKindMap[".altmacro"] = DK_ALTMACRO; + DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap[".reloc"] = DK_RELOC; DirectiveKindMap[".dc"] = DK_DC; DirectiveKindMap[".dc.a"] = DK_DC_A; diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp index f8fe78aece0c..1d12ab858284 100644 --- a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp @@ -13,7 +13,7 @@ using namespace llvm; -MCAsmLexer::MCAsmLexer() { +MCAsmLexer::MCAsmLexer() : AltMacroMode(false) { CurTok.emplace_back(AsmToken::Space, StringRef()); } diff --git a/contrib/llvm/lib/MC/StringTableBuilder.cpp b/contrib/llvm/lib/MC/StringTableBuilder.cpp index fbd7ba60bc90..a0fb33846fcf 100644 --- a/contrib/llvm/lib/MC/StringTableBuilder.cpp +++ b/contrib/llvm/lib/MC/StringTableBuilder.cpp @@ -58,7 +58,7 @@ void StringTableBuilder::write(raw_ostream &OS) const { OS << Data; } -typedef std::pair<CachedHashStringRef, size_t> StringPair; +using StringPair = std::pair<CachedHashStringRef, size_t>; void StringTableBuilder::write(uint8_t *Buf) const { assert(isFinalized()); diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp index 6444046a30d7..0540c4c47a3f 100644 --- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp @@ -468,16 +468,16 @@ static void ApplyRelocations( // Write out the portions of the relocation records that the linker will // need to handle. -static void WriteRelocations( - ArrayRef<WasmRelocationEntry> Relocations, - raw_pwrite_stream &Stream, - DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) -{ +static void +WriteRelocations(ArrayRef<WasmRelocationEntry> Relocations, + raw_pwrite_stream &Stream, + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices, + uint64_t HeaderSize) { for (const WasmRelocationEntry RelEntry : Relocations) { encodeULEB128(RelEntry.Type, Stream); uint64_t Offset = RelEntry.Offset + - RelEntry.FixupSection->getSectionOffset(); + RelEntry.FixupSection->getSectionOffset() + HeaderSize; uint32_t Index = SymbolIndices[RelEntry.Symbol]; int64_t Addend = RelEntry.Addend; @@ -913,12 +913,14 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // For now, always emit the memory section, since loads and stores are not // valid without it. In the future, we could perhaps be more clever and omit // it if there are no loads or stores. - startSection(Section, wasm::WASM_SEC_MEMORY); + uint32_t NumPages = + (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; + startSection(Section, wasm::WASM_SEC_MEMORY); encodeULEB128(1, getStream()); // number of memory spaces encodeULEB128(0, getStream()); // flags - encodeULEB128(DataBytes.size(), getStream()); // initial + encodeULEB128(NumPages, getStream()); // initial endSection(Section); @@ -1050,6 +1052,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // === Data Section ========================================================== + uint32_t DataSectionHeaderSize = 0; if (!DataBytes.empty()) { startSection(Section, wasm::WASM_SEC_DATA); @@ -1059,11 +1062,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, encodeSLEB128(0, getStream()); // offset write8(wasm::WASM_OPCODE_END); encodeULEB128(DataBytes.size(), getStream()); // size + DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset; writeBytes(DataBytes); // data // Apply fixups. ApplyRelocations(DataRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); + Section.ContentsOffset + DataSectionHeaderSize); endSection(Section); } @@ -1107,7 +1111,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); - WriteRelocations(CodeRelocations, getStream(), SymbolIndices); + WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0); WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); endSection(Section); @@ -1121,7 +1125,8 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, encodeULEB128(DataRelocations.size(), getStream()); - WriteRelocations(DataRelocations, getStream(), SymbolIndices); + WriteRelocations(DataRelocations, getStream(), SymbolIndices, + DataSectionHeaderSize); endSection(Section); } diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp index da8fe73f823b..e99a548ac001 100644 --- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/JamCRC.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> @@ -54,7 +55,7 @@ using llvm::support::endian::write32le; namespace { -typedef SmallString<COFF::NameSize> name; +using name = SmallString<COFF::NameSize>; enum AuxiliaryType { ATFunctionDefinition, @@ -75,7 +76,7 @@ class COFFSymbol { public: COFF::symbol Data = {}; - typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols; + using AuxiliarySymbols = SmallVector<AuxSymbol, 1>; name Name; int Index; @@ -107,7 +108,7 @@ struct COFFRelocation { static size_t size() { return COFF::RelocationSize; } }; -typedef std::vector<COFFRelocation> relocations; +using relocations = std::vector<COFFRelocation>; class COFFSection { public: @@ -124,11 +125,11 @@ public: class WinCOFFObjectWriter : public MCObjectWriter { public: - typedef std::vector<std::unique_ptr<COFFSymbol>> symbols; - typedef std::vector<std::unique_ptr<COFFSection>> sections; + using symbols = std::vector<std::unique_ptr<COFFSymbol>>; + using sections = std::vector<std::unique_ptr<COFFSection>>; - typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map; - typedef DenseMap<MCSection const *, COFFSection *> section_map; + using symbol_map = DenseMap<MCSymbol const *, COFFSymbol *>; + using section_map = DenseMap<MCSection const *, COFFSection *>; std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter; diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp index e89a4a315c46..5798a3540f53 100644 --- a/contrib/llvm/lib/Object/ELF.cpp +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -13,9 +13,11 @@ using namespace llvm; using namespace object; -#define ELF_RELOC(name, value) \ - case ELF::name: \ - return #name; \ +#define STRINGIFY_ENUM_CASE(ns, name) \ + case ns::name: \ + return #name; + +#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name) StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { @@ -141,3 +143,61 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, } #undef ELF_RELOC + +StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { + switch (Machine) { + case ELF::EM_ARM: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION); + } + break; + case ELF::EM_HEXAGON: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); } + break; + case ELF::EM_X86_64: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); } + break; + case ELF::EM_MIPS: + case ELF::EM_MIPS_RS3_LE: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF); + } + break; + default: + break; + } + + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_NULL); + STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC); + STRINGIFY_ENUM_CASE(ELF, SHT_NOTE); + STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM); + STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym); + default: + return "Unknown"; + } +} diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp deleted file mode 100644 index 91f93a41032e..000000000000 --- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//==- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Part of the ModuleSummaryIndexObjectFile class implementation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/Error.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include <algorithm> -#include <memory> -#include <system_error> - -using namespace llvm; -using namespace object; - -static cl::opt<bool> IgnoreEmptyThinLTOIndexFile( - "ignore-empty-index-file", cl::ZeroOrMore, - cl::desc( - "Ignore an empty index file and perform non-ThinLTO compilation"), - cl::init(false)); - -ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( - MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I) - : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { -} - -ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() = default; - -std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() { - return std::move(Index); -} - -ErrorOr<MemoryBufferRef> -ModuleSummaryIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) { - for (const SectionRef &Sec : Obj.sections()) { - if (Sec.isBitcode()) { - StringRef SecContents; - if (std::error_code EC = Sec.getContents(SecContents)) - return EC; - return MemoryBufferRef(SecContents, Obj.getFileName()); - } - } - - return object_error::bitcode_section_not_found; -} - -ErrorOr<MemoryBufferRef> -ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); - switch (Type) { - case sys::fs::file_magic::bitcode: - return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { - Expected<std::unique_ptr<ObjectFile>> ObjFile = - ObjectFile::createObjectFile(Object, Type); - if (!ObjFile) - return errorToErrorCode(ObjFile.takeError()); - return findBitcodeInObject(*ObjFile->get()); - } - default: - return object_error::invalid_file_type; - } -} - -// Parse module summary index in the given memory buffer. -// Return new ModuleSummaryIndexObjectFile instance containing parsed -// module summary/index. -Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>> -ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { - ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr = - getModuleSummaryIndex(BCOrErr.get()); - - if (!IOrErr) - return IOrErr.takeError(); - - std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get()); - return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object, - std::move(Index)); -} - -// Parse the module summary index out of an IR file and return the summary -// index object if found, or nullptr if not. -Expected<std::unique_ptr<ModuleSummaryIndex>> -llvm::getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier) { - ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = - MemoryBuffer::getFileOrSTDIN(Path); - std::error_code EC = FileOrErr.getError(); - if (EC) - return errorCodeToError(EC); - std::unique_ptr<MemoryBuffer> MemBuffer = std::move(FileOrErr.get()); - // If Identifier is non-empty, use it as the buffer identifier, which - // will become the module path in the index. - if (Identifier.empty()) - Identifier = MemBuffer->getBufferIdentifier(); - MemoryBufferRef BufferRef(MemBuffer->getBuffer(), Identifier); - if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) - return nullptr; - Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(BufferRef); - if (!ObjOrErr) - return ObjOrErr.takeError(); - - object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; - return Obj.takeIndex(); -} diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp index 55ac2541948e..8db65f7f0e82 100644 --- a/contrib/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm/lib/Passes/PassBuilder.cpp @@ -125,6 +125,7 @@ #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" @@ -150,6 +151,10 @@ using namespace llvm; static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations", cl::ReallyHidden, cl::init(4)); +static cl::opt<bool> EnableGVNHoist( + "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$"); static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { @@ -454,7 +459,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - EarlyFPM.addPass(GVNHoistPass()); + if (EnableGVNHoist) + EarlyFPM.addPass(GVNHoistPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); // Interprocedural constant propagation now that basic cleanup has occured diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def index efd4c097a675..d59ec7f85840 100644 --- a/contrib/llvm/lib/Passes/PassRegistry.def +++ b/contrib/llvm/lib/Passes/PassRegistry.def @@ -229,6 +229,7 @@ LOOP_PASS("strength-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("unroll", LoopUnrollPass::create()) LOOP_PASS("unroll-full", LoopUnrollPass::createFull()) +LOOP_PASS("unswitch", SimpleLoopUnswitchPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index 1227d7528c8f..fa81b28cd083 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -76,34 +76,31 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { void APInt::initSlowCase(uint64_t val, bool isSigned) { - VAL = 0; - pVal = getClearedMemory(getNumWords()); - pVal[0] = val; + U.pVal = getClearedMemory(getNumWords()); + U.pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = WORD_MAX; + U.pVal[i] = WORD_MAX; clearUnusedBits(); } void APInt::initSlowCase(const APInt& that) { - VAL = 0; - pVal = getMemory(getNumWords()); - memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); + U.pVal = getMemory(getNumWords()); + memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); } void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { assert(BitWidth && "Bitwidth too small"); assert(bigVal.data() && "Null pointer detected!"); if (isSingleWord()) - VAL = bigVal[0]; + U.VAL = bigVal[0]; else { // Get memory, cleared to 0 - VAL = 0; - pVal = getClearedMemory(getNumWords()); + U.pVal = getClearedMemory(getNumWords()); // Calculate the number of words to copy unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); // Copy the words from bigVal to pVal - memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); + memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); } // Make sure unused high bits are cleared clearUnusedBits(); @@ -120,7 +117,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) } APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) - : VAL(0), BitWidth(numbits) { + : BitWidth(numbits) { assert(BitWidth && "Bitwidth too small"); fromString(numbits, Str, radix); } @@ -133,25 +130,24 @@ void APInt::AssignSlowCase(const APInt& RHS) { if (BitWidth == RHS.getBitWidth()) { // assume same bit-width single-word case is already handled assert(!isSingleWord()); - memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); + memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); return; } if (isSingleWord()) { // assume case where both are single words is already handled assert(!RHS.isSingleWord()); - VAL = 0; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + U.pVal = getMemory(RHS.getNumWords()); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); } else if (getNumWords() == RHS.getNumWords()) - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); else if (RHS.isSingleWord()) { - delete [] pVal; - VAL = RHS.VAL; + delete [] U.pVal; + U.VAL = RHS.U.VAL; } else { - delete [] pVal; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + delete [] U.pVal; + U.pVal = getMemory(RHS.getNumWords()); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); } BitWidth = RHS.BitWidth; clearUnusedBits(); @@ -162,30 +158,30 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); if (isSingleWord()) { - ID.AddInteger(VAL); + ID.AddInteger(U.VAL); return; } unsigned NumWords = getNumWords(); for (unsigned i = 0; i < NumWords; ++i) - ID.AddInteger(pVal[i]); + ID.AddInteger(U.pVal[i]); } /// @brief Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { if (isSingleWord()) - ++VAL; + ++U.VAL; else - tcIncrement(pVal, getNumWords()); + tcIncrement(U.pVal, getNumWords()); return clearUnusedBits(); } /// @brief Prefix decrement operator. Decrements the APInt by one. APInt& APInt::operator--() { if (isSingleWord()) - --VAL; + --U.VAL; else - tcDecrement(pVal, getNumWords()); + tcDecrement(U.pVal, getNumWords()); return clearUnusedBits(); } @@ -195,17 +191,17 @@ APInt& APInt::operator--() { APInt& APInt::operator+=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL += RHS.VAL; + U.VAL += RHS.U.VAL; else - tcAdd(pVal, RHS.pVal, 0, getNumWords()); + tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator+=(uint64_t RHS) { if (isSingleWord()) - VAL += RHS; + U.VAL += RHS; else - tcAddPart(pVal, RHS, getNumWords()); + tcAddPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } @@ -215,17 +211,17 @@ APInt& APInt::operator+=(uint64_t RHS) { APInt& APInt::operator-=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL -= RHS.VAL; + U.VAL -= RHS.U.VAL; else - tcSubtract(pVal, RHS.pVal, 0, getNumWords()); + tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator-=(uint64_t RHS) { if (isSingleWord()) - VAL -= RHS; + U.VAL -= RHS; else - tcSubtractPart(pVal, RHS, getNumWords()); + tcSubtractPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } @@ -300,7 +296,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], APInt& APInt::operator*=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { - VAL *= RHS.VAL; + U.VAL *= RHS.U.VAL; clearUnusedBits(); return *this; } @@ -326,12 +322,12 @@ APInt& APInt::operator*=(const APInt& RHS) { uint64_t *dest = getMemory(destWords); // Perform the long multiply - mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); + mul(dest, U.pVal, lhsWords, RHS.U.pVal, rhsWords); // Copy result back into *this clearAllBits(); unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; - memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); + memcpy(U.pVal, dest, wordsToCopy * APINT_WORD_SIZE); clearUnusedBits(); // delete dest array and return @@ -340,43 +336,43 @@ APInt& APInt::operator*=(const APInt& RHS) { } void APInt::AndAssignSlowCase(const APInt& RHS) { - tcAnd(pVal, RHS.pVal, getNumWords()); + tcAnd(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::OrAssignSlowCase(const APInt& RHS) { - tcOr(pVal, RHS.pVal, getNumWords()); + tcOr(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::XorAssignSlowCase(const APInt& RHS) { - tcXor(pVal, RHS.pVal, getNumWords()); + tcXor(U.pVal, RHS.U.pVal, getNumWords()); } APInt APInt::operator*(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - return APInt(BitWidth, VAL * RHS.VAL); + return APInt(BitWidth, U.VAL * RHS.U.VAL); APInt Result(*this); Result *= RHS; return Result; } bool APInt::EqualSlowCase(const APInt& RHS) const { - return std::equal(pVal, pVal + getNumWords(), RHS.pVal); + return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); } int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL ? -1 : VAL > RHS.VAL; + return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; - return tcCompare(pVal, RHS.pVal, getNumWords()); + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { - int64_t lhsSext = SignExtend64(VAL, BitWidth); - int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); + int64_t lhsSext = SignExtend64(U.VAL, BitWidth); + int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } @@ -389,14 +385,7 @@ int APInt::compareSigned(const APInt& RHS) const { // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return tcCompare(pVal, RHS.pVal, getNumWords()); -} - -void APInt::setBit(unsigned bitPosition) { - if (isSingleWord()) - VAL |= maskBit(bitPosition); - else - pVal[whichWord(bitPosition)] |= maskBit(bitPosition); + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { @@ -416,28 +405,19 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { if (hiWord == loWord) loMask &= hiMask; else - pVal[hiWord] |= hiMask; + U.pVal[hiWord] |= hiMask; } // Apply the mask to the low word. - pVal[loWord] |= loMask; + U.pVal[loWord] |= loMask; // Fill any words between loWord and hiWord with all ones. for (unsigned word = loWord + 1; word < hiWord; ++word) - pVal[word] = WORD_MAX; -} - -/// Set the given bit to 0 whose position is given as "bitPosition". -/// @brief Set a given bit to 0. -void APInt::clearBit(unsigned bitPosition) { - if (isSingleWord()) - VAL &= ~maskBit(bitPosition); - else - pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); + U.pVal[word] = WORD_MAX; } /// @brief Toggle every bit to its opposite value. void APInt::flipAllBitsSlowCase() { - tcComplement(pVal, getNumWords()); + tcComplement(U.pVal, getNumWords()); clearUnusedBits(); } @@ -464,8 +444,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Single word result can be done as a direct bitmask. if (isSingleWord()) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - VAL &= ~(mask << bitPosition); - VAL |= (subBits.VAL << bitPosition); + U.VAL &= ~(mask << bitPosition); + U.VAL |= (subBits.U.VAL << bitPosition); return; } @@ -476,8 +456,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Insertion within a single word can be done as a direct bitmask. if (loWord == hi1Word) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - pVal[loWord] &= ~(mask << loBit); - pVal[loWord] |= (subBits.VAL << loBit); + U.pVal[loWord] &= ~(mask << loBit); + U.pVal[loWord] |= (subBits.U.VAL << loBit); return; } @@ -485,15 +465,15 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { if (loBit == 0) { // Direct copy whole words. unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; - memcpy(pVal + loWord, subBits.getRawData(), + memcpy(U.pVal + loWord, subBits.getRawData(), numWholeSubWords * APINT_WORD_SIZE); // Mask+insert remaining bits. unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; if (remainingBits != 0) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); - pVal[hi1Word] &= ~mask; - pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + U.pVal[hi1Word] &= ~mask; + U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); } return; } @@ -515,7 +495,7 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { "Illegal bit extraction"); if (isSingleWord()) - return APInt(numBits, VAL >> bitPosition); + return APInt(numBits, U.VAL >> bitPosition); unsigned loBit = whichBit(bitPosition); unsigned loWord = whichWord(bitPosition); @@ -523,12 +503,12 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { // Single word result extracting bits from a single word source. if (loWord == hiWord) - return APInt(numBits, pVal[loWord] >> loBit); + return APInt(numBits, U.pVal[loWord] >> loBit); // Extracting bits that start on a source word boundary can be done // as a fast memory copy. if (loBit == 0) - return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord)); + return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); // General case - shift + copy source words directly into place. APInt Result(numBits, 0); @@ -536,10 +516,10 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { unsigned NumDstWords = Result.getNumWords(); for (unsigned word = 0; word < NumDstWords; ++word) { - uint64_t w0 = pVal[loWord + word]; + uint64_t w0 = U.pVal[loWord + word]; uint64_t w1 = - (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0; - Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); + (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; + Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); } return Result.clearUnusedBits(); @@ -600,9 +580,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { hash_code llvm::hash_value(const APInt &Arg) { if (Arg.isSingleWord()) - return hash_combine(Arg.VAL); + return hash_combine(Arg.U.VAL); - return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); + return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); } bool APInt::isSplat(unsigned SplatSizeInBits) const { @@ -625,10 +605,21 @@ APInt APInt::getLoBits(unsigned numBits) const { return Result; } +/// Return a value containing V broadcasted over NewLen bits. +APInt APInt::getSplat(unsigned NewLen, const APInt &V) { + assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); + + APInt Val = V.zextOrSelf(NewLen); + for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) + Val |= Val << I; + + return Val; +} + unsigned APInt::countLeadingZerosSlowCase() const { unsigned Count = 0; for (int i = getNumWords()-1; i >= 0; --i) { - uint64_t V = pVal[i]; + uint64_t V = U.pVal[i]; if (V == 0) Count += APINT_BITS_PER_WORD; else { @@ -644,7 +635,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { unsigned APInt::countLeadingOnes() const { if (isSingleWord()) - return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth)); + return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; @@ -655,13 +646,13 @@ unsigned APInt::countLeadingOnes() const { shift = APINT_BITS_PER_WORD - highWordBits; } int i = getNumWords() - 1; - unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); + unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == WORD_MAX) + if (U.pVal[i] == WORD_MAX) Count += APINT_BITS_PER_WORD; else { - Count += llvm::countLeadingOnes(pVal[i]); + Count += llvm::countLeadingOnes(U.pVal[i]); break; } } @@ -671,23 +662,23 @@ unsigned APInt::countLeadingOnes() const { unsigned APInt::countTrailingZeros() const { if (isSingleWord()) - return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); + return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == 0; ++i) + for (; i < getNumWords() && U.pVal[i] == 0; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingZeros(pVal[i]); + Count += llvm::countTrailingZeros(U.pVal[i]); return std::min(Count, BitWidth); } unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i) + for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingOnes(pVal[i]); + Count += llvm::countTrailingOnes(U.pVal[i]); assert(Count <= BitWidth); return Count; } @@ -695,13 +686,13 @@ unsigned APInt::countTrailingOnesSlowCase() const { unsigned APInt::countPopulationSlowCase() const { unsigned Count = 0; for (unsigned i = 0; i < getNumWords(); ++i) - Count += llvm::countPopulation(pVal[i]); + Count += llvm::countPopulation(U.pVal[i]); return Count; } bool APInt::intersectsSlowCase(const APInt &RHS) const { for (unsigned i = 0, e = getNumWords(); i != e; ++i) - if ((pVal[i] & RHS.pVal[i]) != 0) + if ((U.pVal[i] & RHS.U.pVal[i]) != 0) return true; return false; @@ -709,7 +700,7 @@ bool APInt::intersectsSlowCase(const APInt &RHS) const { bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { for (unsigned i = 0, e = getNumWords(); i != e; ++i) - if ((pVal[i] & ~RHS.pVal[i]) != 0) + if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) return false; return true; @@ -718,22 +709,22 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) - return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); + return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); if (BitWidth == 32) - return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); + return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); if (BitWidth == 48) { - unsigned Tmp1 = unsigned(VAL >> 16); + unsigned Tmp1 = unsigned(U.VAL >> 16); Tmp1 = ByteSwap_32(Tmp1); - uint16_t Tmp2 = uint16_t(VAL); + uint16_t Tmp2 = uint16_t(U.VAL); Tmp2 = ByteSwap_16(Tmp2); return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); } if (BitWidth == 64) - return APInt(BitWidth, ByteSwap_64(VAL)); + return APInt(BitWidth, ByteSwap_64(U.VAL)); APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); for (unsigned I = 0, N = getNumWords(); I != N; ++I) - Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); + Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); if (Result.BitWidth != BitWidth) { Result.lshrInPlace(Result.BitWidth - BitWidth); Result.BitWidth = BitWidth; @@ -744,13 +735,13 @@ APInt APInt::byteSwap() const { APInt APInt::reverseBits() const { switch (BitWidth) { case 64: - return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); case 32: - return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); case 16: - return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); case 8: - return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); default: break; } @@ -844,7 +835,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { // Otherwise, we have to shift the mantissa bits up to the right location APInt Tmp(width, mantissa); - Tmp = Tmp.shl((unsigned)exp - 52); + Tmp <<= (unsigned)exp - 52; return isNeg ? -Tmp : Tmp; } @@ -895,13 +886,13 @@ double APInt::roundToDouble(bool isSigned) const { uint64_t mantissa; unsigned hiWord = whichWord(n-1); if (hiWord == 0) { - mantissa = Tmp.pVal[0]; + mantissa = Tmp.U.pVal[0]; if (n > 52) mantissa >>= n - 52; // shift down, we want the top 52 bits. } else { assert(hiWord > 0 && "huh?"); - uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); - uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); + uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); + uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); mantissa = hibits | lobits; } @@ -928,12 +919,12 @@ APInt APInt::trunc(unsigned width) const { // Copy full words. unsigned i; for (i = 0; i != width / APINT_BITS_PER_WORD; i++) - Result.pVal[i] = pVal[i]; + Result.U.pVal[i] = U.pVal[i]; // Truncate and copy any partial word. unsigned bits = (0 - width) % APINT_BITS_PER_WORD; if (bits != 0) - Result.pVal[i] = pVal[i] << bits >> bits; + Result.U.pVal[i] = U.pVal[i] << bits >> bits; return Result; } @@ -943,20 +934,20 @@ APInt APInt::sext(unsigned Width) const { assert(Width > BitWidth && "Invalid APInt SignExtend request"); if (Width <= APINT_BITS_PER_WORD) - return APInt(Width, SignExtend64(VAL, BitWidth)); + return APInt(Width, SignExtend64(U.VAL, BitWidth)); APInt Result(getMemory(getNumWords(Width)), Width); // Copy words. - std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Sign extend the last word since there may be unused bits in the input. - Result.pVal[getNumWords() - 1] = - SignExtend64(Result.pVal[getNumWords() - 1], + Result.U.pVal[getNumWords() - 1] = + SignExtend64(Result.U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); // Fill with sign bits. - std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0, + std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); Result.clearUnusedBits(); return Result; @@ -967,15 +958,15 @@ APInt APInt::zext(unsigned width) const { assert(width > BitWidth && "Invalid APInt ZeroExtend request"); if (width <= APINT_BITS_PER_WORD) - return APInt(width, VAL); + return APInt(width, U.VAL); APInt Result(getMemory(getNumWords(width)), width); // Copy words. - std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Zero remaining words. - std::memset(Result.pVal + getNumWords(), 0, + std::memset(Result.U.pVal + getNumWords(), 0, (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); return Result; @@ -1032,28 +1023,28 @@ void APInt::ashrSlowCase(unsigned ShiftAmt) { unsigned WordsToMove = getNumWords() - WordShift; if (WordsToMove != 0) { // Sign extend the last word to fill in the unused bits. - pVal[getNumWords() - 1] = SignExtend64( - pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + U.pVal[getNumWords() - 1] = SignExtend64( + U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); // Fastpath for moving by whole words. if (BitShift == 0) { - std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); } else { // Move the words containing significant bits. for (unsigned i = 0; i != WordsToMove - 1; ++i) - pVal[i] = (pVal[i + WordShift] >> BitShift) | - (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); + U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | + (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); // Handle the last word which has no high bits to copy. - pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift; + U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; // Sign extend one more time. - pVal[WordsToMove - 1] = - SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); + U.pVal[WordsToMove - 1] = + SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); } } // Fill in the remainder based on the original sign. - std::memset(pVal + WordsToMove, Negative ? -1 : 0, + std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, WordShift * APINT_WORD_SIZE); clearUnusedBits(); } @@ -1067,18 +1058,19 @@ void APInt::lshrInPlace(const APInt &shiftAmt) { /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. void APInt::lshrSlowCase(unsigned ShiftAmt) { - tcShiftRight(pVal, getNumWords(), ShiftAmt); + tcShiftRight(U.pVal, getNumWords(), ShiftAmt); } /// Left-shift this APInt by shiftAmt. /// @brief Left-shift function. -APInt APInt::shl(const APInt &shiftAmt) const { +APInt &APInt::operator<<=(const APInt &shiftAmt) { // It's undefined behavior in C to shift by BitWidth or greater. - return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); + *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); + return *this; } void APInt::shlSlowCase(unsigned ShiftAmt) { - tcShiftLeft(pVal, getNumWords(), ShiftAmt); + tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); clearUnusedBits(); } @@ -1141,7 +1133,7 @@ APInt APInt::sqrt() const { /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, /* 31 */ 6 }; - return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); + return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); } // If the magnitude of the value fits in less than 52 bits (the precision of @@ -1150,7 +1142,8 @@ APInt APInt::sqrt() const { // This should be faster than the algorithm below. if (magnitude < 52) { return APInt(BitWidth, - uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); + uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL + : U.pVal[0]))))); } // Okay, all the short cuts are exhausted. We must compute it. The following @@ -1528,7 +1521,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the dividend memset(U, 0, (m+n+1)*sizeof(unsigned)); for (unsigned i = 0; i < lhsWords; ++i) { - uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); + uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.U.VAL : LHS.U.pVal[i]); U[i * 2] = (unsigned)(tmp & mask); U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); } @@ -1537,7 +1530,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the divisor memset(V, 0, (n)*sizeof(unsigned)); for (unsigned i = 0; i < rhsWords; ++i) { - uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); + uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.U.VAL : RHS.U.pVal[i]); V[i * 2] = (unsigned)(tmp & mask); V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); } @@ -1597,12 +1590,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Set up the Quotient value's memory. if (Quotient->BitWidth != LHS.BitWidth) { if (Quotient->isSingleWord()) - Quotient->VAL = 0; + Quotient->U.VAL = 0; else - delete [] Quotient->pVal; + delete [] Quotient->U.pVal; Quotient->BitWidth = LHS.BitWidth; if (!Quotient->isSingleWord()) - Quotient->pVal = getClearedMemory(Quotient->getNumWords()); + Quotient->U.pVal = getClearedMemory(Quotient->getNumWords()); } else Quotient->clearAllBits(); @@ -1614,13 +1607,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, uint64_t tmp = uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); if (Quotient->isSingleWord()) - Quotient->VAL = tmp; + Quotient->U.VAL = tmp; else - Quotient->pVal[0] = tmp; + Quotient->U.pVal[0] = tmp; } else { assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); for (unsigned i = 0; i < lhsWords; ++i) - Quotient->pVal[i] = + Quotient->U.pVal[i] = uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1630,12 +1623,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Set up the Remainder value's memory. if (Remainder->BitWidth != RHS.BitWidth) { if (Remainder->isSingleWord()) - Remainder->VAL = 0; + Remainder->U.VAL = 0; else - delete [] Remainder->pVal; + delete [] Remainder->U.pVal; Remainder->BitWidth = RHS.BitWidth; if (!Remainder->isSingleWord()) - Remainder->pVal = getClearedMemory(Remainder->getNumWords()); + Remainder->U.pVal = getClearedMemory(Remainder->getNumWords()); } else Remainder->clearAllBits(); @@ -1645,13 +1638,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, uint64_t tmp = uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); if (Remainder->isSingleWord()) - Remainder->VAL = tmp; + Remainder->U.VAL = tmp; else - Remainder->pVal[0] = tmp; + Remainder->U.pVal[0] = tmp; } else { assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); for (unsigned i = 0; i < rhsWords; ++i) - Remainder->pVal[i] = + Remainder->U.pVal[i] = uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1670,8 +1663,8 @@ APInt APInt::udiv(const APInt& RHS) const { // First, deal with the easy case if (isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - return APInt(BitWidth, VAL / RHS.VAL); + assert(RHS.U.VAL != 0 && "Divide by zero?"); + return APInt(BitWidth, U.VAL / RHS.U.VAL); } // Get some facts about the LHS and RHS number of bits and words @@ -1693,7 +1686,7 @@ APInt APInt::udiv(const APInt& RHS) const { return APInt(BitWidth, 1); } else if (lhsWords == 1 && rhsWords == 1) { // All high words are zero, just use native divide - return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); + return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); } // We have to compute it the hard way. Invoke the Knuth divide algorithm. @@ -1716,8 +1709,8 @@ APInt APInt::sdiv(const APInt &RHS) const { APInt APInt::urem(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { - assert(RHS.VAL != 0 && "Remainder by zero?"); - return APInt(BitWidth, VAL % RHS.VAL); + assert(RHS.U.VAL != 0 && "Remainder by zero?"); + return APInt(BitWidth, U.VAL % RHS.U.VAL); } // Get some facts about the LHS @@ -1741,7 +1734,7 @@ APInt APInt::urem(const APInt& RHS) const { return APInt(BitWidth, 0); } else if (lhsWords == 1) { // All high words are zero, just use native remainder - return APInt(BitWidth, pVal[0] % RHS.pVal[0]); + return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); } // We have to compute it the hard way. Invoke the Knuth divide algorithm. @@ -1767,9 +1760,9 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, // First, deal with the easy case if (LHS.isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - uint64_t QuotVal = LHS.VAL / RHS.VAL; - uint64_t RemVal = LHS.VAL % RHS.VAL; + assert(RHS.U.VAL != 0 && "Divide by zero?"); + uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; + uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; Quotient = APInt(LHS.BitWidth, QuotVal); Remainder = APInt(LHS.BitWidth, RemVal); return; @@ -1802,8 +1795,8 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, if (lhsWords == 1 && rhsWords == 1) { // There is only one word to consider so use the native versions. - uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; - uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; + uint64_t lhsValue = LHS.isSingleWord() ? LHS.U.VAL : LHS.U.pVal[0]; + uint64_t rhsValue = RHS.isSingleWord() ? RHS.U.VAL : RHS.U.pVal[0]; Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); return; @@ -1930,9 +1923,11 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); - // Allocate memory - if (!isSingleWord()) - pVal = getClearedMemory(getNumWords()); + // Allocate memory if needed + if (isSingleWord()) + U.VAL = 0; + else + U.pVal = getClearedMemory(getNumWords()); // Figure out if we can shift instead of multiply unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); diff --git a/contrib/llvm/lib/Support/BinaryStreamReader.cpp b/contrib/llvm/lib/Support/BinaryStreamReader.cpp index c7a2e0ddb179..702d98770e05 100644 --- a/contrib/llvm/lib/Support/BinaryStreamReader.cpp +++ b/contrib/llvm/lib/Support/BinaryStreamReader.cpp @@ -93,3 +93,16 @@ uint8_t BinaryStreamReader::peek() const { llvm::consumeError(std::move(EC)); return Buffer[0]; } + +std::pair<BinaryStreamReader, BinaryStreamReader> +BinaryStreamReader::split(uint32_t Off) const { + assert(getLength() >= Off); + + BinaryStreamRef First = Stream.drop_front(Offset); + + BinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamReader W1{First}; + BinaryStreamReader W2{Second}; + return std::make_pair(W1, W2); +}
\ No newline at end of file diff --git a/contrib/llvm/lib/Support/BinaryStreamWriter.cpp b/contrib/llvm/lib/Support/BinaryStreamWriter.cpp index d60b75642d0f..d78dbc68f593 100644 --- a/contrib/llvm/lib/Support/BinaryStreamWriter.cpp +++ b/contrib/llvm/lib/Support/BinaryStreamWriter.cpp @@ -59,6 +59,19 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) { return Error::success(); } +std::pair<BinaryStreamWriter, BinaryStreamWriter> +BinaryStreamWriter::split(uint32_t Off) const { + assert(getLength() >= Off); + + WritableBinaryStreamRef First = Stream.drop_front(Offset); + + WritableBinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamWriter W1{First}; + BinaryStreamWriter W2{Second}; + return std::make_pair(W1, W2); +} + Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp index 5d6d60a87fbf..53c10bcc562e 100644 --- a/contrib/llvm/lib/Support/DataExtractor.cpp +++ b/contrib/llvm/lib/Support/DataExtractor.cpp @@ -128,6 +128,16 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { return nullptr; } +StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const { + uint32_t Start = *OffsetPtr; + StringRef::size_type Pos = Data.find('\0', Start); + if (Pos != StringRef::npos) { + *OffsetPtr = Pos + 1; + return StringRef(Data.data() + Start, Pos - Start); + } + return StringRef(); +} + uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { uint64_t result = 0; if (Data.empty()) diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp index 22fb3f2cb9c9..1541a5726302 100644 --- a/contrib/llvm/lib/Support/DynamicLibrary.cpp +++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp @@ -20,169 +20,164 @@ #include "llvm/Support/Mutex.h" #include <cstdio> #include <cstring> +#include <vector> -// Collection of symbol name/value pairs to be searched prior to any libraries. -static llvm::ManagedStatic<llvm::StringMap<void *> > ExplicitSymbols; -static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > SymbolsMutex; - -void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, - void *symbolValue) { - SmartScopedLock<true> lock(*SymbolsMutex); - (*ExplicitSymbols)[symbolName] = symbolValue; -} - -char llvm::sys::DynamicLibrary::Invalid = 0; - -#ifdef LLVM_ON_WIN32 - -#include "Windows/DynamicLibrary.inc" - -#else - -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) -#include <dlfcn.h> using namespace llvm; using namespace llvm::sys; -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +// All methods for HandleSet should be used holding SymbolsMutex. +class DynamicLibrary::HandleSet { + typedef std::vector<void *> HandleList; + HandleList Handles; + void *Process; -static llvm::ManagedStatic<DenseSet<void *> > OpenedHandles; +public: + static void *DLOpen(const char *Filename, std::string *Err); + static void DLClose(void *Handle); + static void *DLSym(void *Handle, const char *Symbol); -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock<true> lock(*SymbolsMutex); + HandleSet() : Process(nullptr) {} + ~HandleSet(); - void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); - if (!handle) { - if (errMsg) *errMsg = dlerror(); - return DynamicLibrary(); + HandleList::iterator Find(void *Handle) { + return std::find(Handles.begin(), Handles.end(), Handle); } -#ifdef __CYGWIN__ - // Cygwin searches symbols only in the main - // with the handle of dlopen(NULL, RTLD_GLOBAL). - if (!filename) - handle = RTLD_DEFAULT; -#endif + bool Contains(void *Handle) { + return Handle == Process || Find(Handle) != Handles.end(); + } - // If we've already loaded this library, dlclose() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(handle).second) - dlclose(handle); + bool AddLibrary(void *Handle, bool IsProcess = false, bool CanClose = true) { +#ifdef LLVM_ON_WIN32 + assert((Handle == this ? IsProcess : !IsProcess) && "Bad Handle."); +#endif - return DynamicLibrary(handle); -} + if (LLVM_LIKELY(!IsProcess)) { + if (Find(Handle) != Handles.end()) { + if (CanClose) + DLClose(Handle); + return false; + } + Handles.push_back(Handle); + } else { +#ifndef LLVM_ON_WIN32 + if (Process) { + if (CanClose) + DLClose(Process); + if (Process == Handle) + return false; + } +#endif + Process = Handle; + } + return true; + } -DynamicLibrary DynamicLibrary::addPermanentLibrary(void *handle, - std::string *errMsg) { - SmartScopedLock<true> lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert(handle).second) { - if (errMsg) *errMsg = "Library already loaded"; - return DynamicLibrary(); + void *Lookup(const char *Symbol) { + // Process handle gets first try. + if (Process) { + if (void *Ptr = DLSym(Process, Symbol)) + return Ptr; +#ifndef NDEBUG + for (void *Handle : Handles) + assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle"); +#endif + } else { + // Iterate in reverse, so newer libraries/symbols override older. + for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) { + if (void *Ptr = DLSym(*I, Symbol)) + return Ptr; + } + } + return nullptr; } +}; - return DynamicLibrary(handle); +namespace { +// Collection of symbol name/value pairs to be searched prior to any libraries. +static llvm::ManagedStatic<llvm::StringMap<void *>> ExplicitSymbols; +// Collection of known library handles. +static llvm::ManagedStatic<DynamicLibrary::HandleSet> OpenedHandles; +// Lock for ExplicitSymbols and OpenedHandles. +static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> SymbolsMutex; } -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) - return nullptr; - return dlsym(Data, symbolName); -} +#ifdef LLVM_ON_WIN32 + +#include "Windows/DynamicLibrary.inc" #else -using namespace llvm; -using namespace llvm::sys; +#include "Unix/DynamicLibrary.inc" + +#endif -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - if (errMsg) *errMsg = "dlopen() not supported on this platform"; - return DynamicLibrary(); +char DynamicLibrary::Invalid; + +namespace llvm { +void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { + return DoSearch(SymbolName); // DynamicLibrary.inc +} } -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - return NULL; +void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) { + SmartScopedLock<true> Lock(*SymbolsMutex); + (*ExplicitSymbols)[SymbolName] = SymbolValue; } -#endif +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, + std::string *Err) { + SmartScopedLock<true> Lock(*SymbolsMutex); + void *Handle = HandleSet::DLOpen(FileName, Err); + if (Handle != &Invalid) + OpenedHandles->AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); -namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName); + return DynamicLibrary(Handle); } -void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { +DynamicLibrary DynamicLibrary::addPermanentLibrary(void *Handle, + std::string *Err) { SmartScopedLock<true> Lock(*SymbolsMutex); + // If we've already loaded this library, tell the caller. + if (!OpenedHandles->AddLibrary(Handle, /*IsProcess*/false, /*CanClose*/false)) + *Err = "Library already loaded"; - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); + return DynamicLibrary(Handle); +} - if (i != ExplicitSymbols->end()) - return i->second; - } +void *DynamicLibrary::getAddressOfSymbol(const char *SymbolName) { + if (!isValid()) + return nullptr; + return HandleSet::DLSym(Data, SymbolName); +} -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet<void *>::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - //lt_ptr ptr = lt_dlsym(*I, symbolName); - void *ptr = dlsym(*I, symbolName); - if (ptr) { - return ptr; - } - } - } -#endif +void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { + { + SmartScopedLock<true> Lock(*SymbolsMutex); - if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName)) - return Result; + // First check symbols added via AddSymbol(). + if (ExplicitSymbols.isConstructed()) { + StringMap<void *>::iterator i = ExplicitSymbols->find(SymbolName); -// This macro returns the address of a well-known, explicit symbol -#define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) return &SYM + if (i != ExplicitSymbols->end()) + return i->second; + } -// On linux we have a weird situation. The stderr/out/in symbols are both -// macros and global variables because of standards requirements. So, we -// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) and !defined(__ANDROID__) - { - EXPLICIT_SYMBOL(stderr); - EXPLICIT_SYMBOL(stdout); - EXPLICIT_SYMBOL(stdin); - } -#else - // For everything else, we want to check to make sure the symbol isn't defined - // as a macro before using EXPLICIT_SYMBOL. - { -#ifndef stdin - EXPLICIT_SYMBOL(stdin); -#endif -#ifndef stdout - EXPLICIT_SYMBOL(stdout); -#endif -#ifndef stderr - EXPLICIT_SYMBOL(stderr); -#endif + // Now search the libraries. + if (OpenedHandles.isConstructed()) { + if (void *Ptr = OpenedHandles->Lookup(SymbolName)) + return Ptr; + } } -#endif -#undef EXPLICIT_SYMBOL - return nullptr; + return llvm::SearchForAddressOfSpecialSymbol(SymbolName); } -#endif // LLVM_ON_WIN32 - //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// -LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { +LLVMBool LLVMLoadLibraryPermanently(const char *Filename) { return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); } diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index 970ecfd7df90..6a0b64fb884d 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -1363,6 +1363,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; + Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp index 5b079ff211fe..abf61b73a70d 100644 --- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp +++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include <cstdarg> +#include <cstdio> #include <tuple> #ifdef HAVE_CRASHREPORTERCLIENT_H diff --git a/contrib/llvm/lib/Support/ScopedPrinter.cpp b/contrib/llvm/lib/Support/ScopedPrinter.cpp index d8ee1efd8f3e..537ff62c7b09 100644 --- a/contrib/llvm/lib/Support/ScopedPrinter.cpp +++ b/contrib/llvm/lib/Support/ScopedPrinter.cpp @@ -21,7 +21,8 @@ const std::string to_hexString(uint64_t Value, bool UpperCase) { } void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, - ArrayRef<uint8_t> Data, bool Block) { + ArrayRef<uint8_t> Data, bool Block, + uint32_t StartOffset) { if (Data.size() > 16) Block = true; @@ -31,7 +32,8 @@ void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, OS << ": " << Str; OS << " (\n"; if (!Data.empty()) - OS << format_bytes_with_ascii(Data, 0, 16, 4, (IndentLevel + 1) * 2, true) + OS << format_bytes_with_ascii(Data, StartOffset, 16, 4, + (IndentLevel + 1) * 2, true) << "\n"; startLine() << ")\n"; } else { diff --git a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp deleted file mode 100644 index 55f3320f640f..000000000000 --- a/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file pulls the addresses of certain symbols out of the linker. It must -// include as few header files as possible because it declares the symbols as -// void*, which would conflict with the actual symbol type if any header -// declared it. -// -//===----------------------------------------------------------------------===// - -#include <string.h> - -// Must declare the symbols in the global namespace. -static void *DoSearch(const char* symbolName) { -#define EXPLICIT_SYMBOL(SYM) \ - extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM - - // If this is darwin, it has some funky issues, try to solve them here. Some - // important symbols are marked 'private external' which doesn't allow - // SearchForAddressOfSymbol to find them. As such, we special case them here, - // there is only a small handful of them. - -#ifdef __APPLE__ - { - // __eprintf is sometimes used for assert() handling on x86. - // - // FIXME: Currently disabled when using Clang, as we don't always have our - // runtime support libraries available. -#ifndef __clang__ -#ifdef __i386__ - EXPLICIT_SYMBOL(__eprintf); -#endif -#endif - } -#endif - -#ifdef __CYGWIN__ - { - EXPLICIT_SYMBOL(_alloca); - EXPLICIT_SYMBOL(__main); - } -#endif - -#undef EXPLICIT_SYMBOL - return nullptr; -} - -namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName) { - return DoSearch(symbolName); -} -} // namespace llvm diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp index ca2391c10ff1..5199fad7d9e9 100644 --- a/contrib/llvm/lib/Support/SourceMgr.cpp +++ b/contrib/llvm/lib/Support/SourceMgr.cpp @@ -51,9 +51,7 @@ static LineNoCacheTy *getCache(void *Ptr) { } SourceMgr::~SourceMgr() { - // Delete the line # cache if allocated. - if (LineNoCacheTy *Cache = getCache(LineNoCache)) - delete Cache; + delete getCache(LineNoCache); } unsigned SourceMgr::AddIncludeFile(const std::string &Filename, diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index f3a654d7d2bd..eb8108908ac5 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -459,7 +459,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("kfreebsd", Triple::KFreeBSD) .StartsWith("linux", Triple::Linux) .StartsWith("lv2", Triple::Lv2) - .StartsWith("macosx", Triple::MacOSX) + .StartsWith("macos", Triple::MacOSX) .StartsWith("netbsd", Triple::NetBSD) .StartsWith("openbsd", Triple::OpenBSD) .StartsWith("solaris", Triple::Solaris) @@ -984,6 +984,8 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor, StringRef OSTypeName = getOSTypeName(getOS()); if (OSName.startswith(OSTypeName)) OSName = OSName.substr(OSTypeName.size()); + else if (getOS() == MacOSX) + OSName.consume_front("macos"); parseVersionFromName(OSName, Major, Minor, Micro); } diff --git a/contrib/llvm/lib/Support/Unix/DynamicLibrary.inc b/contrib/llvm/lib/Support/Unix/DynamicLibrary.inc new file mode 100644 index 000000000000..a0110e7044ee --- /dev/null +++ b/contrib/llvm/lib/Support/Unix/DynamicLibrary.inc @@ -0,0 +1,131 @@ +//===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the UNIX specific implementation of DynamicLibrary. +// +//===----------------------------------------------------------------------===// + +#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) +#include <dlfcn.h> + +DynamicLibrary::HandleSet::~HandleSet() { + for (void *Handle : Handles) + ::dlclose(Handle); + if (Process) + ::dlclose(Process); +} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + void *Handle = ::dlopen(File, RTLD_LAZY|RTLD_GLOBAL); + if (!Handle) { + if (Err) *Err = ::dlerror(); + return &DynamicLibrary::Invalid; + } + +#ifdef __CYGWIN__ + // Cygwin searches symbols only in the main + // with the handle of dlopen(NULL, RTLD_GLOBAL). + if (!Filename) + Handle = RTLD_DEFAULT; +#endif + + return Handle; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + ::dlclose(Handle); +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return ::dlsym(Handle, Symbol); +} + +#else // !HAVE_DLOPEN + +DynamicLibrary::HandleSet::~HandleSet() {} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + if (Err) *Err = "dlopen() not supported on this platform"; + return &Invalid; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return nullptr; +} + +#endif + +// Must declare the symbols in the global namespace. +static void *DoSearch(const char* SymbolName) { +#define EXPLICIT_SYMBOL(SYM) \ + extern void *SYM; if (!strcmp(SymbolName, #SYM)) return &SYM + + // If this is darwin, it has some funky issues, try to solve them here. Some + // important symbols are marked 'private external' which doesn't allow + // SearchForAddressOfSymbol to find them. As such, we special case them here, + // there is only a small handful of them. + +#ifdef __APPLE__ + { + // __eprintf is sometimes used for assert() handling on x86. + // + // FIXME: Currently disabled when using Clang, as we don't always have our + // runtime support libraries available. +#ifndef __clang__ +#ifdef __i386__ + EXPLICIT_SYMBOL(__eprintf); +#endif +#endif + } +#endif + +#ifdef __CYGWIN__ + { + EXPLICIT_SYMBOL(_alloca); + EXPLICIT_SYMBOL(__main); + } +#endif + +#undef EXPLICIT_SYMBOL + +// This macro returns the address of a well-known, explicit symbol +#define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(SymbolName, #SYM)) return &SYM + +// On linux we have a weird situation. The stderr/out/in symbols are both +// macros and global variables because of standards requirements. So, we +// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. +#if defined(__linux__) and !defined(__ANDROID__) + { + EXPLICIT_SYMBOL(stderr); + EXPLICIT_SYMBOL(stdout); + EXPLICIT_SYMBOL(stdin); + } +#else + // For everything else, we want to check to make sure the symbol isn't defined + // as a macro before using EXPLICIT_SYMBOL. + { +#ifndef stdin + EXPLICIT_SYMBOL(stdin); +#endif +#ifndef stdout + EXPLICIT_SYMBOL(stdout); +#endif +#ifndef stderr + EXPLICIT_SYMBOL(stderr); +#endif + } +#endif +#undef EXPLICIT_SYMBOL + + return nullptr; +} diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc index 709499deeafa..0b54b5dfdbc5 100644 --- a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc +++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc @@ -12,98 +12,140 @@ //===----------------------------------------------------------------------===// #include "WindowsSupport.h" +#include "llvm/Support/raw_ostream.h" -#ifdef __MINGW32__ - #include <imagehlp.h> -#else - #include <dbghelp.h> -#endif - -#ifdef _MSC_VER - #include <ntverp.h> -#endif - -namespace llvm { +#include <psapi.h> //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code //=== and must not be UNIX code. //===----------------------------------------------------------------------===// -typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); -static fpEnumerateLoadedModules fEnumerateLoadedModules; -static llvm::ManagedStatic<DenseSet<HMODULE> > OpenedHandles; -static bool loadDebugHelp(void) { - HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); - if (hLib) { - fEnumerateLoadedModules = (fpEnumerateLoadedModules) - ::GetProcAddress(hLib, "EnumerateLoadedModules64"); - } - return fEnumerateLoadedModules != 0; -} +DynamicLibrary::HandleSet::~HandleSet() { + for (void *Handle : Handles) + FreeLibrary(HMODULE(Handle)); -static BOOL CALLBACK -ELM_Callback(PCSTR ModuleName, DWORD64 ModuleBase, - ULONG ModuleSize, PVOID UserContext) { - OpenedHandles->insert((HMODULE)ModuleBase); - return TRUE; + // 'Process' should not be released on Windows. + assert((!Process || Process==this) && "Bad Handle"); } -sys::DynamicLibrary -sys::DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock<true> lock(*SymbolsMutex); - - if (!filename) { - // When no file is specified, enumerate all DLLs and EXEs in the process. - if (!fEnumerateLoadedModules) { - if (!loadDebugHelp()) { - assert(false && "These APIs should always be available"); - return DynamicLibrary(); - } - } +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + // Create the instance and return it to be the *Process* handle + // simillar to dlopen(NULL, RTLD_LAZY|RTLD_GLOBAL) + if (!File) + return &(*OpenedHandles); - fEnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0); - // Dummy library that represents "search all handles". - // This is mostly to ensure that the return value still shows up as "valid". - return DynamicLibrary(&OpenedHandles); - } - - SmallVector<wchar_t, MAX_PATH> filenameUnicode; - if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SmallVector<wchar_t, MAX_PATH> FileUnicode; + if (std::error_code ec = windows::UTF8ToUTF16(File, FileUnicode)) { SetLastError(ec.value()); - MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16"); - return DynamicLibrary(); + MakeErrMsg(Err, std::string(File) + ": Can't convert to UTF-16"); + return &DynamicLibrary::Invalid; } - HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); - - if (a_handle == 0) { - MakeErrMsg(errMsg, std::string(filename) + ": Can't open"); - return DynamicLibrary(); + HMODULE Handle = LoadLibraryW(FileUnicode.data()); + if (Handle == NULL) { + MakeErrMsg(Err, std::string(File) + ": Can't open"); + return &DynamicLibrary::Invalid; } - // If we've already loaded this library, FreeLibrary() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(a_handle).second) - FreeLibrary(a_handle); + return reinterpret_cast<void*>(Handle); +} - return DynamicLibrary(a_handle); +static DynamicLibrary::HandleSet *IsOpenedHandlesInstance(void *Handle) { + if (!OpenedHandles.isConstructed()) + return nullptr; + DynamicLibrary::HandleSet &Inst = *OpenedHandles; + return Handle == &Inst ? &Inst : nullptr; } -sys::DynamicLibrary -sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { - SmartScopedLock<true> lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert((HMODULE)handle).second) { - MakeErrMsg(errMsg, "Library already loaded"); - return DynamicLibrary(); +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + if (HandleSet* HS = IsOpenedHandlesInstance(Handle)) + HS->Process = nullptr; // Just drop the *Process* handle. + else + FreeLibrary((HMODULE)Handle); +} + +static bool GetProcessModules(HANDLE H, DWORD &Bytes, HMODULE *Data = nullptr) { + // EnumProcessModules will fail on Windows 64 while some versions of + // MingW-32 don't have EnumProcessModulesEx. + if ( +#ifdef _WIN64 + !EnumProcessModulesEx(H, Data, Bytes, &Bytes, LIST_MODULES_64BIT) +#else + !EnumProcessModules(H, Data, Bytes, &Bytes) +#endif + ) { + std::string Err; + if (MakeErrMsg(&Err, "EnumProcessModules failure")) + llvm::errs() << Err << "\n"; + return false; } + return true; +} - return DynamicLibrary(handle); +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + HandleSet* HS = IsOpenedHandlesInstance(Handle); + if (!HS) + return (void *)uintptr_t(GetProcAddress((HMODULE)Handle, Symbol)); + + // Could have done a dlclose on the *Process* handle + if (!HS->Process) + return nullptr; + + // Trials indicate EnumProcessModulesEx is consistantly faster than using + // EnumerateLoadedModules64 or CreateToolhelp32Snapshot. + // + // | Handles | DbgHelp.dll | CreateSnapshot | EnumProcessModulesEx + // |=========|=============|======================================== + // | 37 | 0.0000585 * | 0.0003031 | 0.0000152 + // | 1020 | 0.0026310 * | 0.0121598 | 0.0002683 + // | 2084 | 0.0149418 * | 0.0369936 | 0.0005610 + // + // * Not including the load time of Dbghelp.dll (~.005 sec) + // + // There's still a case to somehow cache the result of EnumProcessModulesEx + // across invocations, but the complication of doing that properly... + // Possibly using LdrRegisterDllNotification to invalidate the cache? + + DWORD Bytes = 0; + HMODULE Self = HMODULE(GetCurrentProcess()); + if (!GetProcessModules(Self, Bytes)) + return nullptr; + + // Get the most recent list in case any modules added/removed between calls + // to EnumProcessModulesEx that gets the amount of, then copies the HMODULES. + // MSDN is pretty clear that if the module list changes during the call to + // EnumProcessModulesEx the results should not be used. + std::vector<HMODULE> Handles; + do { + assert(Bytes && ((Bytes % sizeof(HMODULE)) == 0) && + "Should have at least one module and be aligned"); + Handles.resize(Bytes / sizeof(HMODULE)); + if (!GetProcessModules(Self, Bytes, Handles.data())) + return nullptr; + } while (Bytes != (Handles.size() * sizeof(HMODULE))); + + // Try EXE first, mirroring what dlsym(dlopen(NULL)) does. + if (FARPROC Ptr = GetProcAddress(HMODULE(Handles.front()), Symbol)) + return (void *) uintptr_t(Ptr); + + if (Handles.size() > 1) { + // This is different behaviour than what Posix dlsym(dlopen(NULL)) does. + // Doing that here is causing real problems for the JIT where msvc.dll + // and ucrt.dll can define the same symbols. The runtime linker will choose + // symbols from ucrt.dll first, but iterating NOT in reverse here would + // mean that the msvc.dll versions would be returned. + + for (auto I = Handles.rbegin(), E = Handles.rend()-1; I != E; ++I) { + if (FARPROC Ptr = GetProcAddress(HMODULE(*I), Symbol)) + return (void *) uintptr_t(Ptr); + } + } + return nullptr; } + // Stack probing routines are in the support library (e.g. libgcc), but we don't // have dynamic linking on windows. Provide a hook. #define EXPLICIT_SYMBOL(SYM) \ @@ -129,38 +171,18 @@ sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 -void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { - SmartScopedLock<true> Lock(*SymbolsMutex); - - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); - - if (i != ExplicitSymbols->end()) - return i->second; - } - - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); - if (ptr) { - return (void *)(intptr_t)ptr; - } - } - } +static void *DoSearch(const char *SymbolName) { #define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&SYM; #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \ - if (!strcmp(symbolName, #SYMFROM)) \ + if (!strcmp(SymbolName, #SYMFROM)) \ return (void *)&SYMTO; #ifdef _M_IX86 #define INLINE_DEF_SYMBOL1(TYP, SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&inline_##SYM; #define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM) #endif @@ -174,15 +196,5 @@ void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 - return 0; -} - -void *sys::DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) - return NULL; - if (Data == &OpenedHandles) - return SearchForAddressOfSymbol(symbolName); - return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName); -} - + return nullptr; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index efc221893782..056ffd58b521 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -580,8 +580,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO_Sym = MI->getOperand(0); MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); MCOperand Sym, SymTLSDescLo12, SymTLSDesc; - MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); MCInstLowering.lowerOperand(MO_Sym, Sym); MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index b2f55a7e1e09..ff3e4c40e2c2 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -247,7 +247,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned i = 0; for (auto &Arg : F.args()) { ArgInfo OrigArg{VRegs[i], Arg.getType()}; - setArgFlags(OrigArg, i + 1, DL, F); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); bool Split = false; LLT Ty = MRI.getType(VRegs[i]); unsigned Dst = VRegs[i]; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 4e5e3e43a468..083708001757 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2907,16 +2907,13 @@ bool AArch64FastISel::fastLowerArguments() { // Only handle simple cases of up to 8 GPR and FPR each. unsigned GPRCnt = 0; unsigned FPRCnt = 0; - unsigned Idx = 0; for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7141e77fcd25..b18fb30eb2d4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1852,17 +1853,17 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, OpUsefulBits = 1; if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + OpUsefulBits <<= MSB - Imm + 1; --OpUsefulBits; // The interesting part will be in the lower part of the result getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was starting at Imm in the argument - OpUsefulBits = OpUsefulBits.shl(Imm); + OpUsefulBits <<= Imm; } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); + OpUsefulBits <<= MSB + 1; --OpUsefulBits; // The interesting part will be shifted in the result - OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); + OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was at zero in the argument OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); @@ -1892,7 +1893,7 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { // Shift Left uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.shl(ShiftAmt); + Mask <<= ShiftAmt; getUsefulBits(Op, Mask, Depth + 1); Mask.lshrInPlace(ShiftAmt); } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { @@ -1902,7 +1903,7 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask.lshrInPlace(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.shl(ShiftAmt); + Mask <<= ShiftAmt; } else return; @@ -1930,13 +1931,13 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, uint64_t Width = MSB - Imm + 1; uint64_t LSB = Imm; - OpUsefulBits = OpUsefulBits.shl(Width); + OpUsefulBits <<= Width; --OpUsefulBits; if (Op.getOperand(1) == Orig) { // Copy the low bits from the result to bits starting from LSB. Mask = ResultUsefulBits & OpUsefulBits; - Mask = Mask.shl(LSB); + Mask <<= LSB; } if (Op.getOperand(0) == Orig) @@ -1947,9 +1948,9 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, uint64_t Width = MSB + 1; uint64_t LSB = UsefulBits.getBitWidth() - Imm; - OpUsefulBits = OpUsefulBits.shl(Width); + OpUsefulBits <<= Width; --OpUsefulBits; - OpUsefulBits = OpUsefulBits.shl(LSB); + OpUsefulBits <<= LSB; if (Op.getOperand(1) == Orig) { // Copy the bits from the result to the zero bits. @@ -2078,18 +2079,18 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, (void)BitWidth; assert(BitWidth == 32 || BitWidth == 64); - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Op, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(Op, Known); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value - uint64_t NonZeroBits = (~KnownZero).getZExtValue(); + uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); // Discard a constant AND mask if present. It's safe because the node will // already have been factored into the computeKnownBits calculation above. uint64_t AndImm; if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { - assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); + assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); Op = Op.getOperand(0); } @@ -2158,15 +2159,15 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { // Compute the Known Zero for the AND as this allows us to catch more general // cases than just looking for AND with imm. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(And, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(And, Known); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value. - uint64_t NotKnownZero = (~KnownZero).getZExtValue(); + uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). - if (!isShiftedMask(KnownZero.getZExtValue(), VT)) + if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) return false; // The bits being inserted must only set those bits that are known to be zero. @@ -2300,15 +2301,15 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // This allows to catch more general case than just looking for // AND with imm. Indeed, simplify-demanded-bits may have removed // the AND instruction because it proves it was useless. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(OrOpd1Val, Known); // Check if there is enough room for the second operand to appear // in the first one APInt BitsToBeInserted = - APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); + APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); - if ((BitsToBeInserted & ~KnownZero) != 0) + if ((BitsToBeInserted & ~Known.Zero) != 0) continue; // Set the first operand diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a7c98fbb425f..eb1bbcafe6e6 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetCallingConv.h" @@ -929,20 +930,19 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant( } /// computeKnownBitsForTargetNode - Determine which of the bits specified in -/// Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. +/// Mask are known to be either zero or one and return them Known. void AArch64TargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; case AArch64ISD::CSEL: { - APInt KnownZero2, KnownOne2; - DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); - DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + KnownBits Known2; + DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; break; } case ISD::INTRINSIC_W_CHAIN: { @@ -952,10 +952,10 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( default: return; case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { - unsigned BitWidth = KnownOne.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } @@ -974,15 +974,15 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( // bits larger than the element datatype. 32-bit or larget doesn't need // this as those are legal types and will be handled by isel directly. MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); if (VT == MVT::v8i8 || VT == MVT::v16i8) { assert(BitWidth >= 8 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); - KnownZero |= Mask; + Known.Zero |= Mask; } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { assert(BitWidth >= 16 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); - KnownZero |= Mask; + Known.Zero |= Mask; } break; } break; @@ -4847,9 +4847,9 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, - &Flags); - Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); + Flags); + Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } if (!Reciprocal) { @@ -4858,7 +4858,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags); // Correct the result if the operand is 0.0. Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, Eq, Operand, Estimate); @@ -4887,8 +4887,8 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, // AArch64 reciprocal iteration instruction: (2 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, - Estimate, &Flags); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); + Estimate, Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } ExtraSteps = 0; @@ -9461,11 +9461,11 @@ static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { APInt DemandedMask = APInt::getLowBitsSet(64, 56); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) { + if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); return true; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6081b07479b9..89db566c219c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -250,8 +250,8 @@ public: /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 82e9c5a88e3b..ce401206e517 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -314,8 +314,8 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; // AArch64 Instruction Predicate Definitions. def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; -def ForCodeSize : Predicate<"ForCodeSize">; -def NotForCodeSize : Predicate<"!ForCodeSize">; +def ForCodeSize : Predicate<"Subtarget->getForCodeSize()">; +def NotForCodeSize : Predicate<"!Subtarget->getForCodeSize()">; include "AArch64InstrFormats.td" diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index b0e0e3eb4ba7..9bfd570e9a82 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -51,7 +51,6 @@ public: const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); - void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -74,12 +73,10 @@ private: const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; - bool ForCodeSize; - PredicateBitset AvailableFeatures; - PredicateBitset - computeAvailableFeatures(const MachineFunction *MF, - const AArch64Subtarget *Subtarget) const; +#define GET_GLOBALISEL_PREDICATES_DECL +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. @@ -98,7 +95,10 @@ AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures() + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -577,12 +577,6 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } -void AArch64InstructionSelector::beginFunction( - const MachineFunction &MF) { - ForCodeSize = MF.getFunction()->optForSize(); - AvailableFeatures = computeAvailableFeatures(&MF, &STI); -} - bool AArch64InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 042755bd36d0..abdeac019a18 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -12,8 +12,22 @@ //===----------------------------------------------------------------------===// #include "AArch64Subtarget.h" + +#include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64TargetMachine.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "AArch64CallLowering.h" +#include "AArch64LegalizerInfo.h" +#include "AArch64RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/TargetRegistry.h" @@ -111,13 +125,63 @@ void AArch64Subtarget::initializeProperties() { } } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct AArch64GISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const TargetMachine &TM, bool LittleEndian) + const TargetMachine &TM, bool LittleEndian, + bool ForCodeSize) : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), - TLInfo(TM, *this), GISel() {} + TLInfo(TM, *this), GISel(), ForCodeSize(ForCodeSize) { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *AArch64GISel = new GISelAccessor(); +#else + AArch64GISelActualAccessor *AArch64GISel = new AArch64GISelActualAccessor(); + AArch64GISel->CallLoweringInfo.reset( + new AArch64CallLowering(*getTargetLowering())); + AArch64GISel->Legalizer.reset(new AArch64LegalizerInfo()); + + auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + AArch64GISel->InstSelector.reset(createAArch64InstructionSelector( + *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); + + AArch64GISel->RegBankInfo.reset(RBI); +#endif + setGISelAccessor(*AArch64GISel); +} const CallLowering *AArch64Subtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index 3d66a9ea8ce6..5b9bee6e41b8 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -124,6 +124,8 @@ protected: /// an optional library. std::unique_ptr<GISelAccessor> GISel; + bool ForCodeSize; + private: /// initializeSubtargetDependencies - Initializes using CPUString and the /// passed in feature string so that we can use initializer lists for @@ -139,7 +141,7 @@ public: /// of the specified triple. AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, - bool LittleEndian); + bool LittleEndian, bool ForCodeSize); /// This object will take onwership of \p GISelAccessor. void setGISelAccessor(GISelAccessor &GISel) { @@ -262,6 +264,8 @@ public: } } + bool getForCodeSize() const { return ForCodeSize; } + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index dcc51bf02329..de7108d302dd 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -11,12 +11,7 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" -#include "AArch64CallLowering.h" -#include "AArch64LegalizerInfo.h" #include "AArch64MacroFusion.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "AArch64RegisterBankInfo.h" -#endif #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" @@ -25,7 +20,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -222,39 +216,11 @@ AArch64TargetMachine::AArch64TargetMachine( AArch64TargetMachine::~AArch64TargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct AArch64GISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); + bool ForCodeSize = F.optForSize(); std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString().str() @@ -262,35 +228,17 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { std::string FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString().str() : TargetFS; + std::string ForCodeSizeStr = + std::string(ForCodeSize ? "+" : "-") + "forcodesize"; - auto &I = SubtargetMap[CPU + FS]; + auto &I = SubtargetMap[CPU + FS + ForCodeSizeStr]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, - isLittle); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - AArch64GISelActualAccessor *GISel = - new AArch64GISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AArch64CallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AArch64LegalizerInfo()); - - auto *RBI = new AArch64RegisterBankInfo(*I->getRegisterInfo()); - - // FIXME: At this point, we can't rely on Subtarget having RBI. - // It's awkward to mix passing RBI and the Subtarget; should we pass - // TII/TRI as well? - GISel->InstSelector.reset( - createAArch64InstructionSelector(*this, *I, *RBI)); - - GISel->RegBankInfo.reset(RBI); -#endif - I->setGISelAccessor(*GISel); + isLittle, ForCodeSize); } return I.get(); } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index c954c0eb2c6b..10e7241da709 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -69,34 +69,34 @@ static bool isNonILP32reloc(const MCFixup &Fixup, return true; case AArch64MCExpr::VK_ABS_G2_S: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G2_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G1_S: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_DTPREL_G2: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_DTPREL_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_TPREL_G2: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_TPREL_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_GOTTPREL_G1: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_GOTTPREL_G0_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); - return ELF::R_AARCH64_NONE; + return true; default: return false; } return false; @@ -141,6 +141,16 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case AArch64::fixup_aarch64_pcrel_adrp_imm21: if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC) return R_CLS(ADR_PREL_PG_HI21); + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) { + if (IsILP32) { + Ctx.reportError(Fixup.getLoc(), + "invalid fixup for 32-bit pcrel ADRP instruction " + "VK_ABS VK_NC"); + return ELF::R_AARCH64_NONE; + } else { + return ELF::R_AARCH64_ADR_PREL_PG_HI21_NC; + } + } if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) return R_CLS(ADR_GOT_PAGE); if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC) @@ -179,7 +189,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(ABS32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(ABS64)); + Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data " + "relocation not supported (LP64 eqv: ABS64)"); return ELF::R_AARCH64_NONE; } else return ELF::R_AARCH64_ABS64; @@ -197,7 +208,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (RefKind == AArch64MCExpr::VK_TPREL_LO12) return R_CLS(TLSLE_ADD_TPREL_LO12); if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12) - return R_CLS(TLSDESC_ADD_LO12_NC); + return R_CLS(TLSDESC_ADD_LO12); if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(ADD_ABS_LO12_NC); @@ -245,15 +256,67 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(TLSLE_LDST32_TPREL_LO12); if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return R_CLS(TLSLE_LDST32_TPREL_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_LD32_GOT_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte unchecked GOT load/store relocation " + "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) { + if (IsILP32) { + Ctx.reportError(Fixup.getLoc(), + "ILP32 4 byte checked GOT load/store relocation " + "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte checked GOT load/store relocation " + "not supported (unchecked/ILP32 eqv: " + "LD32_GOT_LO12_NC)"); + } + return ELF::R_AARCH64_NONE; + } + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store " + "relocation not supported (ILP32 eqv: " + "TLSIE_LD32_GOTTPREL_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_TLSDESC_LD32_LO12; + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte TLSDESC load/store relocation " + "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); + return ELF::R_AARCH64_NONE; + } + } Ctx.reportError(Fixup.getLoc(), - "invalid fixup for 32-bit load/store instruction"); + "invalid fixup for 32-bit load/store instruction " + "fixup_aarch64_ldst_imm12_scale4"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale8: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(LDST64_ABS_LO12_NC); - if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) - return R_CLS(LD64_GOT_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) { + if (!IsILP32) { + return ELF::R_AARCH64_LD64_GOT_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "LD64_GOT_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return R_CLS(TLSLD_LDST64_DTPREL_LO12); if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) @@ -262,19 +325,40 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(TLSLE_LDST64_TPREL_LO12); if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return R_CLS(TLSLE_LDST64_TPREL_LO12_NC); - if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) - return IsILP32 ? ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC - : ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC) - return IsILP32 ? ELF::R_AARCH64_P32_TLSDESC_LD32_LO12_NC - : ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) { + if (!IsILP32) { + return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "TLSIE_LD64_GOTTPREL_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_TLSDESC) { + if (!IsILP32) { + return ELF::R_AARCH64_TLSDESC_LD64_LO12; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "TLSDESC_LD64_LO12)"); + return ELF::R_AARCH64_NONE; + } + } Ctx.reportError(Fixup.getLoc(), "invalid fixup for 64-bit load/store instruction"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale16: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(LDST128_ABS_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return R_CLS(TLSLD_LDST128_DTPREL_LO12); + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return R_CLS(TLSLD_LDST128_DTPREL_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return R_CLS(TLSLE_LDST128_TPREL_LO12); + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return R_CLS(TLSLE_LDST128_TPREL_LO12_NC); Ctx.reportError(Fixup.getLoc(), "invalid fixup for 128-bit load/store instruction"); diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index a540f49866a9..97c92fa0778d 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -62,6 +62,7 @@ StringRef AArch64MCExpr::getVariantKindName() const { case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:"; case VK_TLSDESC_LO12: return ":tlsdesc_lo12:"; case VK_ABS_PAGE: return ""; + case VK_ABS_PAGE_NC: return ":pg_hi21_nc:"; case VK_GOT_PAGE: return ":got:"; case VK_GOT_LO12: return ":got_lo12:"; case VK_GOTTPREL_PAGE: return ":gottprel:"; diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index db36a65564ce..3dbf0f84a665 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -62,6 +62,7 @@ public: // since a user would write ":lo12:"). VK_CALL = VK_ABS, VK_ABS_PAGE = VK_ABS | VK_PAGE, + VK_ABS_PAGE_NC = VK_ABS | VK_PAGE | VK_NC, VK_ABS_G3 = VK_ABS | VK_G3, VK_ABS_G2 = VK_ABS | VK_G2, VK_ABS_G2_S = VK_SABS | VK_G2, @@ -95,7 +96,7 @@ public: VK_TPREL_HI12 = VK_TPREL | VK_HI12, VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF, VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC, - VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC, + VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF, VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, VK_INVALID = 0xfff diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td index 0f331486d0f8..2e5b78bbf7ef 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -407,7 +407,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, - FeatureFastFMAF32 + FeatureFastFMAF32, FeatureDPP ] >; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index a81bcb56dfdc..2ce23dbf08e6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -149,11 +149,9 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { return; const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); - SIProgramInfo KernelInfo; amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(*MF)) { - getSIProgramInfo(KernelInfo, *MF); - getAmdKernelCode(KernelCode, KernelInfo, *MF); + getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); getTargetStreamer().EmitAMDKernelCodeT(KernelCode); @@ -187,7 +185,26 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { AsmPrinter::EmitGlobalVariable(GV); } +bool AMDGPUAsmPrinter::doFinalization(Module &M) { + CallGraphResourceInfo.clear(); + return AsmPrinter::doFinalization(M); +} + +// Print comments that apply to both callable functions and entry points. +void AMDGPUAsmPrinter::emitCommonFunctionComments( + uint32_t NumVGPR, + uint32_t NumSGPR, + uint32_t ScratchSize, + uint64_t CodeSize) { + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); + OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); + OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); +} + bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + CurrentProgramInfo = SIProgramInfo(); + const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); // The starting address of all shader programs must be 256 bytes aligned. @@ -204,11 +221,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(ConfigSection); } - SIProgramInfo KernelInfo; if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - getSIProgramInfo(KernelInfo, MF); + if (MFI->isEntryFunction()) { + getSIProgramInfo(CurrentProgramInfo, MF); + } else { + auto I = CallGraphResourceInfo.insert( + std::make_pair(MF.getFunction(), SIFunctionResourceInfo())); + SIFunctionResourceInfo &Info = I.first->second; + assert(I.second && "should only be called once per function"); + Info = analyzeResourceUsage(MF); + } + if (!STM.isAmdHsaOS()) { - EmitProgramInfoSI(MF, KernelInfo); + EmitProgramInfoSI(MF, CurrentProgramInfo); } } else { EmitProgramInfoR600(MF); @@ -226,72 +251,87 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(CommentSection); if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - if (MFI->isEntryFunction()) { - OutStreamer->emitRawComment(" Kernel info:", false); - } else { + if (!MFI->isEntryFunction()) { OutStreamer->emitRawComment(" Function info:", false); + SIFunctionResourceInfo &Info = CallGraphResourceInfo[MF.getFunction()]; + emitCommonFunctionComments( + Info.NumVGPR, + Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()), + Info.PrivateSegmentSize, + getFunctionCodeSize(MF)); + return false; } + OutStreamer->emitRawComment(" Kernel info:", false); + emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + CurrentProgramInfo.NumSGPR, + CurrentProgramInfo.ScratchSize, + getFunctionCodeSize(MF)); + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(getFunctionCodeSize(MF)), false); - OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), - false); - OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), - false); - - OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), - false); - OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), - false); - OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize), - false); - OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + - " bytes/workgroup (compile time only)", false); - - if (!MFI->isEntryFunction()) - return false; + OutStreamer->emitRawComment( + " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false); + OutStreamer->emitRawComment( + " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false); + + OutStreamer->emitRawComment( + " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); + OutStreamer->emitRawComment( + " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false); + OutStreamer->emitRawComment( + " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false); + OutStreamer->emitRawComment( + " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) + + " bytes/workgroup (compile time only)", false); - OutStreamer->emitRawComment(" SGPRBlocks: " + - Twine(KernelInfo.SGPRBlocks), false); - OutStreamer->emitRawComment(" VGPRBlocks: " + - Twine(KernelInfo.VGPRBlocks), false); + OutStreamer->emitRawComment( + " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false); + OutStreamer->emitRawComment( + " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false); - OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " + - Twine(KernelInfo.NumSGPRsForWavesPerEU), false); - OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " + - Twine(KernelInfo.NumVGPRsForWavesPerEU), false); + OutStreamer->emitRawComment( + " NumSGPRsForWavesPerEU: " + + Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false); + OutStreamer->emitRawComment( + " NumVGPRsForWavesPerEU: " + + Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); - OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst), - false); - OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount), - false); + OutStreamer->emitRawComment( + " ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst), + false); + OutStreamer->emitRawComment( + " ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount), + false); if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) { - OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + - Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); - OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" + - Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false); + OutStreamer->emitRawComment( + " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + + Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); + OutStreamer->emitRawComment( + " DebuggerPrivateSegmentBufferSGPR: s" + + Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false); } - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + - Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + - Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + - Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " + - Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " + - Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " + - Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)), - false); - + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:USER_SGPR: " + + Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + + Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_X_EN: " + + Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_Y_EN: " + + Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_Z_EN: " + + Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " + + Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)), + false); } else { R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); OutStreamer->emitRawComment( @@ -407,71 +447,117 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, return false; } -void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, - const MachineFunction &MF) const { - const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); - const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIInstrInfo *TII = STM.getInstrInfo(); - const SIRegisterInfo *RI = &TII->getRegisterInfo(); +static unsigned getNumExtraSGPRs(const SISubtarget &ST, + bool VCCUsed, + bool FlatScrUsed) { + unsigned ExtraSGPRs = 0; + if (VCCUsed) + ExtraSGPRs = 2; + if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { + if (FlatScrUsed) + ExtraSGPRs = 4; + } else { + if (ST.isXNACKEnabled()) + ExtraSGPRs = 4; - MCPhysReg NumVGPRReg = AMDGPU::NoRegister; - for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { - if (MRI.isPhysRegUsed(Reg)) { - NumVGPRReg = Reg; - break; - } + if (FlatScrUsed) + ExtraSGPRs = 6; } - MCPhysReg NumSGPRReg = AMDGPU::NoRegister; - for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { - if (MRI.isPhysRegUsed(Reg)) { - NumSGPRReg = Reg; - break; - } - } + return ExtraSGPRs; +} - // We found the maximum register index. They start at 0, so add one to get the - // number of registers. - ProgInfo.NumVGPR = NumVGPRReg == AMDGPU::NoRegister ? 0 : - RI->getHWRegIndex(NumVGPRReg) + 1; - ProgInfo.NumSGPR = NumSGPRReg == AMDGPU::NoRegister ? 0 : - RI->getHWRegIndex(NumSGPRReg) + 1; - unsigned ExtraSGPRs = 0; +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( + const SISubtarget &ST) const { + return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch); +} - ProgInfo.VCCUsed = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || - MRI.isPhysRegUsed(AMDGPU::VCC_HI); - if (ProgInfo.VCCUsed) - ExtraSGPRs = 2; +AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( + const MachineFunction &MF) const { + SIFunctionResourceInfo Info; - ProgInfo.FlatUsed = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || - MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || + MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI); // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat - // instructions aren't used to access the scratch buffer. Inline assembly - // may need it though. + // instructions aren't used to access the scratch buffer. Inline assembly may + // need it though. // // If we only have implicit uses of flat_scr on flat instructions, it is not // really needed. - if (ProgInfo.FlatUsed && !MFI->hasFlatScratchInit() && + if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() && (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { - ProgInfo.FlatUsed = false; + Info.UsesFlatScratch = false; } - if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { - if (ProgInfo.FlatUsed) - ExtraSGPRs = 4; - } else { - if (STM.isXNACKEnabled()) - ExtraSGPRs = 4; + Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); + Info.PrivateSegmentSize = FrameInfo.getStackSize(); - if (ProgInfo.FlatUsed) - ExtraSGPRs = 6; + if (!FrameInfo.hasCalls()) { + Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || + MRI.isPhysRegUsed(AMDGPU::VCC_HI); + + // If there are no calls, MachineRegisterInfo can tell us the used register + // count easily. + + MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestVGPRReg = Reg; + break; + } + } + + MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestSGPRReg = Reg; + break; + } + } + + // We found the maximum register index. They start at 0, so add one to get the + // number of registers. + Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestVGPRReg) + 1; + Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestSGPRReg) + 1; + + return Info; } + llvm_unreachable("calls not implemented"); +} + +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, + const MachineFunction &MF) { + SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + + ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumSGPR = Info.NumExplicitSGPR; + ProgInfo.ScratchSize = Info.PrivateSegmentSize; + ProgInfo.VCCUsed = Info.UsesVCC; + ProgInfo.FlatUsed = Info.UsesFlatScratch; + ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; + + const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + const SIInstrInfo *TII = STM.getInstrInfo(); + const SIRegisterInfo *RI = &TII->getRegisterInfo(); + + unsigned ExtraSGPRs = getNumExtraSGPRs(STM, + ProgInfo.VCCUsed, + ProgInfo.FlatUsed); unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF); // Check the addressable register limit before we add ExtraSGPRs. @@ -574,9 +660,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // Make clamp modifier on NaN input returns 0. ProgInfo.DX10Clamp = STM.enableDX10Clamp(); - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - ProgInfo.ScratchSize = FrameInfo.getStackSize(); - unsigned LDSAlignShift; if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. @@ -638,6 +721,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { switch (CallConv) { default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1; + case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS; case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS; case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS; case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS; @@ -645,7 +729,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { } void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) { + const SIProgramInfo &CurrentProgramInfo) { const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv()); @@ -653,29 +737,29 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); - OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4); + OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4); OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); - OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4); + OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4); OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4); - OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4); + OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 = // 0" comment but I don't see a corresponding field in the register spec. } else { OutStreamer->EmitIntValue(RsrcReg, 4); - OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) | - S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4); + OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | + S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); if (STM.isVGPRSpillingEnabled(*MF.getFunction())) { OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); - OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4); + OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); } } if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); - OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4); + OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4); OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); @@ -703,7 +787,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { } void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, - const SIProgramInfo &KernelInfo, + const SIProgramInfo &CurrentProgramInfo, const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); @@ -711,10 +795,13 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); Out.compute_pgm_resource_registers = - KernelInfo.ComputePGMRSrc1 | - (KernelInfo.ComputePGMRSrc2 << 32); + CurrentProgramInfo.ComputePGMRSrc1 | + (CurrentProgramInfo.ComputePGMRSrc2 << 32); Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64; + if (CurrentProgramInfo.DynamicCallStack) + Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK; + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize())); @@ -766,12 +853,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, // FIXME: Should use getKernArgSize Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); - Out.wavefront_sgpr_count = KernelInfo.NumSGPR; - Out.workitem_vgpr_count = KernelInfo.NumVGPR; - Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; - Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; + Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; + Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; + Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize; + Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst; + Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. @@ -780,9 +867,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (STM.debuggerEmitPrologue()) { Out.debug_wavefront_private_segment_offset_sgpr = - KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; + CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; Out.debug_private_segment_buffer_sgpr = - KernelInfo.DebuggerPrivateSegmentBufferSGPR; + CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR; } } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 8c86dea4b885..e5adeeb465e1 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -30,9 +30,26 @@ namespace llvm { class AMDGPUTargetStreamer; class MCOperand; +class SISubtarget; class AMDGPUAsmPrinter final : public AsmPrinter { private: + // Track resource usage for callee functions. + struct SIFunctionResourceInfo { + // Track the number of explicitly used VGPRs. Special registers reserved at + // the end are tracked separately. + int32_t NumVGPR = 0; + int32_t NumExplicitSGPR = 0; + uint32_t PrivateSegmentSize = 0; + bool UsesVCC = false; + bool UsesFlatScratch = false; + bool HasDynamicallySizedStack = false; + bool HasRecursion = false; + + int32_t getTotalNumSGPRs(const SISubtarget &ST) const; + }; + + // Track resource usage for kernels / entry functions. struct SIProgramInfo { // Fields set in PGM_RSRC1 pm4 packet. uint32_t VGPRBlocks = 0; @@ -83,14 +100,23 @@ private: uint16_t DebuggerPrivateSegmentBufferSGPR = std::numeric_limits<uint16_t>::max(); + // Whether there is recursion, dynamic allocas, indirect calls or some other + // reason there may be statically unknown stack usage. + bool DynamicCallStack = false; + // Bonus information for debugging. bool VCCUsed = false; SIProgramInfo() = default; }; + SIProgramInfo CurrentProgramInfo; + DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo; + uint64_t getFunctionCodeSize(const MachineFunction &MF) const; - void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const; + SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; + + void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; void findNumUsedRegistersSI(const MachineFunction &MF, @@ -101,6 +127,10 @@ private: /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); + void emitCommonFunctionComments(uint32_t NumVGPR, + uint32_t NumSGPR, + uint32_t ScratchSize, + uint64_t CodeSize); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, @@ -112,6 +142,7 @@ public: AMDGPUTargetStreamer& getTargetStreamer() const; + bool doFinalization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index f5110857da84..ccae36ced1f8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -207,8 +207,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { return true; // TODO: Move into isKnownNeverNaN - if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) - return BO->Flags.hasNoNaNs(); + if (N->getFlags().isDefined()) + return N->getFlags().hasNoNaNs(); return CurDAG->isKnownNeverNaN(N); } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e21775e61dd4..64e1b8f0d7f0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Support/KnownBits.h" #include "SIInstrInfo.h" using namespace llvm; @@ -895,6 +896,7 @@ CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::SPIR_KERNEL: return CC_AMDGPU_Kernel; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -2293,11 +2295,11 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, //===----------------------------------------------------------------------===// static bool isU24(SDValue Op, SelectionDAG &DAG) { - APInt KnownZero, KnownOne; + KnownBits Known; EVT VT = Op.getValueType(); - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); - return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24; + return (VT.getSizeInBits() - Known.Zero.countLeadingOnes()) <= 24; } static bool isI24(SDValue Op, SelectionDAG &DAG) { @@ -3358,13 +3360,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, OffsetVal, OffsetVal + WidthVal); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || - TLI.SimplifyDemandedBits(BitsFrom, Demanded, - KnownZero, KnownOne, TLO)) { + TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } } @@ -3516,6 +3517,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; + NODE_NAME_CASE(INIT_EXEC) + NODE_NAME_CASE(INIT_EXEC_FROM_INPUT) NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) @@ -3574,14 +3577,12 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, } void AMDGPUTargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownZero.getBitWidth(); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known.Zero.clearAllBits(); Known.One.clearAllBits(); // Don't know anything. - APInt KnownZero2; - APInt KnownOne2; + KnownBits Known2; unsigned Opc = Op.getOpcode(); switch (Opc) { @@ -3589,7 +3590,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { - KnownZero = APInt::getHighBitsSet(32, 31); + Known.Zero = APInt::getHighBitsSet(32, 31); break; } @@ -3602,16 +3603,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( uint32_t Width = CWidth->getZExtValue() & 0x1f; if (Opc == AMDGPUISD::BFE_U32) - KnownZero = APInt::getHighBitsSet(32, 32 - Width); + Known.Zero = APInt::getHighBitsSet(32, 32 - Width); break; } case AMDGPUISD::FP_TO_FP16: case AMDGPUISD::FP16_ZEXT: { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); // High bits are zero. - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); + Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); break; } } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 13cbfe267932..e1a5a2072418 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -125,8 +125,9 @@ public: if (getTargetMachine().Options.NoSignedZerosFPMath) return true; - if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op)) - return BO->Flags.hasNoSignedZeros(); + const auto Flags = Op.getNode()->getFlags(); + if (Flags.isDefined()) + return Flags.hasNoSignedZeros(); return false; } @@ -199,8 +200,7 @@ public: /// either zero or one and return them in the \p KnownZero and \p KnownOne /// bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -370,6 +370,8 @@ enum NodeType : unsigned { BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, + INIT_EXEC, + INIT_EXEC_FROM_INPUT, SENDMSG, SENDMSGHALT, INTERP_MOV, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index c1706d12a2ea..353cc5742791 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -299,6 +299,15 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + +def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", + SDTypeProfile<0, 2, + [SDTCisInt<0>, SDTCisInt<1>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 27fe639e3d4b..fe7283ccf7d9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -17,6 +17,7 @@ static bool isEntryFunctionCC(CallingConv::ID CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4fb262c6277c..36dcc699d4ea 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -677,12 +677,19 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { } const Function &ContainingFunction = *I.getParent()->getParent(); + CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. - if (AMDGPU::isShader(ContainingFunction.getCallingConv())) + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + break; + default: + DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return; + } const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0202220b8011..cd5bad04d0b3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -309,6 +309,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { default: return false; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 6edd3e923ba1..c9482c37ec80 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -432,6 +432,7 @@ static bool isArgPassedInSGPR(const Argument *A) { case CallingConv::SPIR_KERNEL: return true; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index ea305a92fc60..630442625aa3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -422,8 +422,9 @@ void GCNScheduleDAGMILive::discoverLiveIns() { unsigned SGPRs = 0; unsigned VGPRs = 0; + auto &MI = *begin()->getParent()->getFirstNonDebugInstr(); const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); - SlotIndex SI = LIS->getInstructionIndex(*begin()).getBaseIndex(); + SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); assert (SI.isValid()); DEBUG(dbgs() << "Region live-ins:"); diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td b/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td index a5310e9fd6d0..4c9e1e8a5434 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td +++ b/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td @@ -61,7 +61,7 @@ def int_r600_ddx : TextureIntrinsicFloatInput; def int_r600_ddy : TextureIntrinsicFloatInput; def int_r600_dot4 : Intrinsic<[llvm_float_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem] + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; } // End TargetPrefix = "r600", isTarget = 1 diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index b7e62075244b..d8cb98fe1b19 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -77,9 +77,10 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - Value *handleLoopCondition(Value *Cond, PHINode *Broken, - llvm::Loop *L, BranchInst *Term, - SmallVectorImpl<WeakVH> &LoopPhiConditions); + Value * + handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, + BranchInst *Term, + SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions); void handleLoop(BranchInst *Term); @@ -212,9 +213,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { /// \brief Recursively handle the condition leading to a loop Value *SIAnnotateControlFlow::handleLoopCondition( - Value *Cond, PHINode *Broken, - llvm::Loop *L, BranchInst *Term, - SmallVectorImpl<WeakVH> &LoopPhiConditions) { + Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term, + SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) { // Only search through PHI nodes which are inside the loop. If we try this // with PHI nodes that are outside of the loop, we end up inserting new PHI @@ -281,7 +281,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition( NewPhi->setIncomingValue(i, PhiArg); } - LoopPhiConditions.push_back(WeakVH(Phi)); + LoopPhiConditions.push_back(WeakTrackingVH(Phi)); return Ret; } @@ -323,7 +323,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front()); - SmallVector<WeakVH, 8> LoopPhiConditions; + SmallVector<WeakTrackingVH, 8> LoopPhiConditions; Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions); @@ -333,7 +333,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); - for (WeakVH Val : reverse(LoopPhiConditions)) { + for (WeakTrackingVH Val : reverse(LoopPhiConditions)) { if (PHINode *Cond = cast_or_null<PHINode>(Val)) eraseIfUnused(Cond); } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h index 3dd372b32866..a01330cb9171 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h @@ -302,6 +302,7 @@ enum DstUnused { #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 +#define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index b0f0bf04a891..3cca815d8773 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -278,8 +278,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI, Visited.insert(Reg); - MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); - assert(DefInstr); + MachineInstr *DefInstr = MRI.getVRegDef(Reg); switch (DefInstr->getOpcode()) { default: break; @@ -346,7 +345,7 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; DenseSet<const MachineBasicBlock*> Visited; - SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(), + SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(), MBB->pred_end()); while (!Worklist.empty()) { @@ -546,7 +545,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + TII->moveToVALU(MI); + break; + } + + MachineInstr *DefMI = MRI.getVRegDef(SrcReg); unsigned SMovOp; int64_t Imm; // If we are just copying an immediate, we can replace the copy with diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ce74a7cd8b04..853c8737b464 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -68,6 +68,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetOptions.h" @@ -1956,6 +1957,63 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; + case AMDGPU::SI_INIT_EXEC: + // This should be before all vector instructions. + BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), + AMDGPU::EXEC) + .addImm(MI.getOperand(0).getImm()); + MI.eraseFromParent(); + return BB; + + case AMDGPU::SI_INIT_EXEC_FROM_INPUT: { + // Extract the thread count from an SGPR input and set EXEC accordingly. + // Since BFM can't shift by 64, handle that case with CMP + CMOV. + // + // S_BFE_U32 count, input, {shift, 7} + // S_BFM_B64 exec, count, 0 + // S_CMP_EQ_U32 count, 64 + // S_CMOV_B64 exec, -1 + MachineInstr *FirstMI = &*BB->begin(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned InputReg = MI.getOperand(0).getReg(); + unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + bool Found = false; + + // Move the COPY of the input reg to the beginning, so that we can use it. + for (auto I = BB->begin(); I != &MI; I++) { + if (I->getOpcode() != TargetOpcode::COPY || + I->getOperand(0).getReg() != InputReg) + continue; + + if (I == FirstMI) { + FirstMI = &*++BB->begin(); + } else { + I->removeFromParent(); + BB->insert(FirstMI, &*I); + } + Found = true; + break; + } + assert(Found); + + // This should be before all vector instructions. + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg) + .addReg(InputReg) + .addImm((MI.getOperand(1).getImm() & 0x7f) | 0x70000); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFM_B64), + AMDGPU::EXEC) + .addReg(CountReg) + .addImm(0); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32)) + .addReg(CountReg, RegState::Kill) + .addImm(64); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMOV_B64), + AMDGPU::EXEC) + .addImm(-1); + MI.eraseFromParent(); + return BB; + } + case AMDGPU::GET_GROUPSTATICSIZE: { DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) @@ -3223,6 +3281,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(NodeOp, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_init_exec: { + return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain, + Op.getOperand(2)); + } + case Intrinsic::amdgcn_init_exec_from_input: { + return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, + Op.getOperand(2), Op.getOperand(3)); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, @@ -3455,15 +3521,15 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags *Flags = Op->getFlags(); + const SDNodeFlags Flags = Op->getFlags(); - if (Unsafe || Flags->hasAllowReciprocal()) { + if (Unsafe || Flags.hasAllowReciprocal()) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + SDNodeFlags NewFlags; + NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); } return SDValue(); @@ -4542,10 +4608,9 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return ISD::FMAD; const TargetOptions &Options = DAG.getTarget().Options; - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || - (cast<BinaryWithFlagsSDNode>(N0)->Flags.hasUnsafeAlgebra() && - cast<BinaryWithFlagsSDNode>(N1)->Flags.hasUnsafeAlgebra())) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || + (N0->getFlags().hasUnsafeAlgebra() && + N1->getFlags().hasUnsafeAlgebra())) && isFMAFasterThanFMulAndFAdd(VT)) { return ISD::FMA; } @@ -4706,12 +4771,12 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N, APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) || - TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { + TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 3f6ddec70479..7ccb54f54e34 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -286,6 +286,19 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { let isReMaterializable = 1; } +def SI_INIT_EXEC : SPseudoInstSI < + (outs), (ins i64imm:$src), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; + let isAsCheapAsAMove = 1; +} + +def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < + (outs), (ins SSrc_b32:$input, i32imm:$shift), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; +} + // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { @@ -399,6 +412,16 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < } // End SubtargetPredicate = isGCN let Predicates = [isGCN] in { +def : Pat < + (AMDGPUinit_exec i64:$src), + (SI_INIT_EXEC (as_i64imm $src)) +>; + +def : Pat < + (AMDGPUinit_exec_from_input i32:$input, i32:$shift), + (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) +>; + def : Pat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5a3242bed1d0..d565c84bfeda 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -503,6 +503,7 @@ unsigned getInitialPSInputAddr(const Function &F) { bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 39f7988200ea..4676226acd9c 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -23,9 +23,12 @@ namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +class ARMRegisterBankInfo; +class ARMSubtarget; struct BasicBlockInfo; class Function; class FunctionPass; +class InstructionSelector; class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -43,6 +46,9 @@ FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass( std::function<bool(const Function &)> Ftor = nullptr); +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 005b74a68af3..46fd1f70ee99 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -577,6 +577,7 @@ def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, FeatureVFP2, diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 13fb30767c9f..9178c67afa6e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -245,12 +245,21 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { // that's what we should load. Size = 4; assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); - MRI.setType(ValVReg, LLT::scalar(32)); + + auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); + buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO); + MIRBuilder.buildTrunc(ValVReg, LoadVReg); + } else { + // If the value is not extended, a simple load will suffice. + buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO); } + } + void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment, + MachinePointerInfo &MPO) { auto MMO = MIRBuilder.getMF().getMachineMemOperand( - MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); - MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + MPO, MachineMemOperand::MOLoad, Size, Alignment); + MIRBuilder.buildLoad(Val, Addr, *MMO); } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -345,7 +354,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + 1, DL, F); + setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); Idx++; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index e9bc7db66fa4..56cac855620d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -3025,20 +3025,18 @@ bool ARMFastISel::fastLowerArguments() { // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments // which are passed in r0 - r3. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - if (Idx > 4) + for (const Argument &Arg : F->args()) { + if (Arg.getArgNo() >= 4) return false; - if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::ByVal)) return false; - Type *ArgTy = I->getType(); + Type *ArgTy = Arg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; @@ -3059,10 +3057,10 @@ bool ARMFastISel::fastLowerArguments() { }; const TargetRegisterClass *RC = &ARM::rGPRRegClass; - Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - unsigned SrcReg = GPRArgRegs[Idx]; + I != E; ++I) { + unsigned ArgNo = I->getArgNo(); + unsigned SrcReg = GPRArgRegs[ArgNo]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index e9df9449103c..7f9fe55a5c38 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -740,7 +740,9 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(Offset); replaceDAGValue(Offset.getOperand(1), NewMulConst); + Offset = Handle.getValue(); ShAmt = PowerOfTwo; ShOpcVal = ARM_AM::lsl; } @@ -1420,7 +1422,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(OffReg); replaceDAGValue(OffReg.getOperand(1), NewMulConst); + OffReg = Handle.getValue(); ShAmt = PowerOfTwo; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 382f881f7741..9f7e60a848d9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -91,6 +91,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -11758,9 +11759,9 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D // Lastly, can we determine that the bits defined by OrCI // are zero in Y? - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Y, KnownZero, KnownOne); - if ((OrCI & KnownZero) != OrCI) + KnownBits Known; + DAG.computeKnownBits(Y, Known); + if ((OrCI & Known.Zero) != OrCI) return SDValue(); // OK, we can do the combine. @@ -11898,16 +11899,16 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { } if (Res.getNode()) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(SDValue(N,0), Known); // Capture demanded bits information that would be otherwise lost. - if (KnownZero == 0xfffffffe) + if (Known.Zero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); - else if (KnownZero == 0xffffff00) + else if (Known.Zero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); - else if (KnownZero == 0xffff0000) + else if (Known.Zero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } @@ -12596,13 +12597,12 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownOne.getBitWidth(); - KnownZero = KnownOne = APInt(BitWidth, 0); + unsigned BitWidth = Known.getBitWidth(); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: @@ -12612,17 +12612,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // These nodes' second result is a boolean if (Op.getResNo() == 0) break; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - if (KnownZero == 0 && KnownOne == 0) return; + DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1); + if (Known.Zero == 0 && Known.One == 0) return; - APInt KnownZeroRHS, KnownOneRHS; - DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); - KnownZero &= KnownZeroRHS; - KnownOne &= KnownOneRHS; + KnownBits KnownRHS; + DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1); + Known.Zero &= KnownRHS.Zero; + Known.One &= KnownRHS.One; return; } case ISD::INTRINSIC_W_CHAIN: { @@ -12634,7 +12634,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::arm_ldrex: { EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } @@ -12642,14 +12642,14 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::BFI: { // Conservatively, we can recurse down the first operand // and just mask out all affected bits. - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); const APInt &Mask = CI->getAPIntValue(); - KnownZero &= Mask; - KnownOne &= Mask; + Known.Zero &= Mask; + Known.One &= Mask; return; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 8b54ce430ed2..76e4b60e01fb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -350,8 +350,7 @@ class InstrItineraryData; SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 9d8ee5c3f9dc..28eb5fc30864 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -317,8 +317,10 @@ def UseNegativeImmediates : "NegativeImmediates">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; -def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +let RecomputePerFunction = 1 in { + def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; + def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +} def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -345,8 +347,10 @@ def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" "Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; -def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +let RecomputePerFunction = 1 in { + def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; + def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +} def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 1c13d51a468e..2ac3fda9f448 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -11,10 +11,10 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// -#include "ARMInstructionSelector.h" #include "ARMRegisterBankInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -26,10 +26,68 @@ using namespace llvm; #error "You shouldn't build this" #endif -ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI, +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class ARMInstructionSelector : public InstructionSelector { +public: + ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); + + bool select(MachineInstr &I) const override; + +private: + bool selectImpl(MachineInstr &I) const; + + const ARMBaseInstrInfo &TII; + const ARMBaseRegisterInfo &TRI; + const ARMBaseTargetMachine &TM; + const ARMRegisterBankInfo &RBI; + const ARMSubtarget &STI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +// We declare the temporaries used by selectImpl() in the class to minimize the +// cost of constructing placeholder values. +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; +} // end anonymous namespace + +namespace llvm { +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI) { + return new ARMInstructionSelector(TM, STI, RBI); +} +} + +unsigned zero_reg = 0; + +#define GET_GLOBALISEL_IMPL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, @@ -232,6 +290,9 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return true; } + if (selectImpl(I)) + return true; + MachineInstrBuilder MIB{MF, I}; bool isSExt = false; @@ -332,16 +393,6 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_SDIV: - assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); - I.setDesc(TII.get(ARM::SDIV)); - MIB.add(predOps(ARMCC::AL)); - break; - case G_UDIV: - assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); - I.setDesc(TII.get(ARM::UDIV)); - MIB.add(predOps(ARMCC::AL)); - break; case G_FADD: if (!selectFAdd(MIB, TII, MRI)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h deleted file mode 100644 index 530141d92c2c..000000000000 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- ARMInstructionSelector -----------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file declares the targeting of the InstructionSelector class for ARM. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H -#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H - -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" - -namespace llvm { - -class ARMBaseInstrInfo; -class ARMBaseRegisterInfo; -class ARMRegisterBankInfo; -class ARMSubtarget; - -class ARMInstructionSelector : public InstructionSelector { -public: - ARMInstructionSelector(const ARMSubtarget &STI, - const ARMRegisterBankInfo &RBI); - - bool select(MachineInstr &I) const override; - -private: - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMRegisterBankInfo &RBI; -}; - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index b8dadb331ecf..d09f3ecbaa28 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -12,7 +12,6 @@ #include "ARM.h" #include "ARMCallLowering.h" -#include "ARMInstructionSelector.h" #include "ARMLegalizerInfo.h" #include "ARMRegisterBankInfo.h" #include "ARMSubtarget.h" @@ -339,7 +338,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // FIXME: At this point, we can't rely on Subtarget having RBI. // It's awkward to mix passing RBI and the Subtarget; should we pass // TII/TRI as well? - GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI)); GISel->RegBankInfo.reset(RBI); #endif diff --git a/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp index ab42a7aa9901..c297865db820 100644 --- a/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -57,6 +57,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>(); const AVRInstrInfo &TII = *STI.getInstrInfo(); + bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. if (CallConv == CallingConv::AVR_INTR) { @@ -65,6 +66,13 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // Save the frame pointer if we have one. + if (HasFP) { + BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) + .addReg(AVR::R29R28, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } + // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. if (CallConv == CallingConv::AVR_INTR || @@ -72,6 +80,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R1R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) .addImm(0x3f) .setMIFlag(MachineInstr::FrameSetup); @@ -86,7 +95,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, } // Early exit if the frame pointer is not needed in this function. - if (!hasFP(MF)) { + if (!HasFP) { return; } @@ -165,6 +174,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); } + if (hasFP(MF)) + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28); + // Early exit if there is no need to restore the frame pointer. if (!FrameSize) { return; @@ -407,12 +419,9 @@ void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - // Spill register Y when it is used as the frame pointer. - if (hasFP(MF)) { - SavedRegs.set(AVR::R29R28); - SavedRegs.set(AVR::R29); - SavedRegs.set(AVR::R28); - } + // If we have a frame pointer, the Y register needs to be saved as well. + // We don't do that here however - the prologue and epilogue generation + // code will handle it specially. } /// The frame analyzer pass. /// diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp index 0b95d3819399..f0ab6acedad1 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -79,6 +79,11 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + setOperationAction(ISD::ROTL, MVT::i16, Custom); + setOperationAction(ISD::ROTR, MVT::i8, Custom); + setOperationAction(ISD::ROTR, MVT::i16, Custom); + setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); @@ -273,6 +278,12 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL: return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); + case ISD::ROTL: + return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), + N->getOperand(1)); + case ISD::ROTR: + return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), + N->getOperand(1)); case ISD::SRA: return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); @@ -1440,6 +1451,22 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, Opc = AVR::LSRWRd; RC = &AVR::DREGSRegClass; break; + case AVR::Rol8: + Opc = AVR::ROLRd; + RC = &AVR::GPR8RegClass; + break; + case AVR::Rol16: + Opc = AVR::ROLWRd; + RC = &AVR::DREGSRegClass; + break; + case AVR::Ror8: + Opc = AVR::RORRd; + RC = &AVR::GPR8RegClass; + break; + case AVR::Ror16: + Opc = AVR::RORWRd; + RC = &AVR::DREGSRegClass; + break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1552,6 +1579,10 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case AVR::Lsl16: case AVR::Lsr8: case AVR::Lsr16: + case AVR::Rol8: + case AVR::Rol16: + case AVR::Ror8: + case AVR::Ror16: case AVR::Asr8: case AVR::Asr16: return insertShift(MI, MBB); diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h index a8cdc4e7ae23..b44c62a21ac3 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h @@ -43,6 +43,8 @@ enum NodeType { ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. LSRLOOP, ///< A loop of single logical shift right instructions. + ROLLOOP, ///< A loop of single left bit rotate instructions. + RORLOOP, ///< A loop of single right bit rotate instructions. ASRLOOP, ///< A loop of single arithmetic shift right instructions. /// AVR conditional branches. Operand 0 is the chain operand, operand 1 /// is the block to branch if condition is true, operand 2 is the diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td index 693d80a1c06f..1b6547ef7795 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -64,6 +64,8 @@ def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; def AVRlsrLoop : SDNode<"AVRISD::LSRLOOP", SDTIntShiftOp>; +def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>; +def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>; def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>; //===----------------------------------------------------------------------===// @@ -183,33 +185,33 @@ def call_target : Operand<iPTR> // A 16-bit address (which can lead to an R_AVR_16 relocation). def imm16 : Operand<i16> { - let EncoderMethod = "encodeImm<AVR::fixup_16>"; + let EncoderMethod = "encodeImm<AVR::fixup_16, 2>"; } /// A 6-bit immediate used in the ADIW/SBIW instructions. def imm_arith6 : Operand<i16> { - let EncoderMethod = "encodeImm<AVR::fixup_6_adiw>"; + let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>"; } /// An 8-bit immediate inside an instruction with the same format /// as the `LDI` instruction (the `FRdK` format). def imm_ldi8 : Operand<i8> { - let EncoderMethod = "encodeImm<AVR::fixup_ldi>"; + let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>"; } /// A 5-bit port number used in SBIC and friends (the `FIOBIT` format). def imm_port5 : Operand<i8> { - let EncoderMethod = "encodeImm<AVR::fixup_port5>"; + let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>"; } /// A 6-bit port number used in the `IN` instruction and friends (the /// `FIORdA` format. def imm_port6 : Operand<i8> { - let EncoderMethod = "encodeImm<AVR::fixup_port6>"; + let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>"; } // Addressing mode pattern reg+imm6 @@ -1932,7 +1934,6 @@ def Lsr8 : ShiftPseudo< [(set i8:$dst, (AVRlsrLoop i8:$src, i8:$cnt))] >; - def Lsr16 : ShiftPseudo< (outs DREGS:$dst), (ins DREGS:$src, GPR8:$cnt), @@ -1940,6 +1941,34 @@ def Lsr16 : ShiftPseudo< [(set i16:$dst, (AVRlsrLoop i16:$src, i8:$cnt))] >; +def Rol8 : ShiftPseudo< + (outs GPR8:$dst), + (ins GPR8:$src, GPR8:$cnt), + "# Rol8 PSEUDO", + [(set i8:$dst, (AVRrolLoop i8:$src, i8:$cnt))] +>; + +def Rol16 : ShiftPseudo< + (outs DREGS:$dst), + (ins DREGS:$src, GPR8:$cnt), + "# Rol16 PSEUDO", + [(set i16:$dst, (AVRrolLoop i16:$src, i8:$cnt))] +>; + +def Ror8 : ShiftPseudo< + (outs GPR8:$dst), + (ins GPR8:$src, GPR8:$cnt), + "# Ror8 PSEUDO", + [(set i8:$dst, (AVRrorLoop i8:$src, i8:$cnt))] +>; + +def Ror16 : ShiftPseudo< + (outs DREGS:$dst), + (ins DREGS:$src, GPR8:$cnt), + "# Ror16 PSEUDO", + [(set i16:$dst, (AVRrorLoop i16:$src, i8:$cnt))] +>; + def Asr8 : ShiftPseudo< (outs GPR8:$dst), (ins GPR8:$src, GPR8:$cnt), diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 5cc7eaf8add3..2813e24d2ac7 100644 --- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -65,12 +65,18 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AVR::SPH); Reserved.set(AVR::SP); - // Reserve the frame pointer registers r28 and r29 if the function requires one. - if (TFI->hasFP(MF)) { - Reserved.set(AVR::R28); - Reserved.set(AVR::R29); - Reserved.set(AVR::R29R28); - } + // We tenatively reserve the frame pointer register r29:r28 because the + // function may require one, but we cannot tell until register allocation + // is complete, which can be too late. + // + // Instead we just unconditionally reserve the Y register. + // + // TODO: Write a pass to enumerate functions which reserved the Y register + // but didn't end up needing a frame pointer. In these, we can + // convert one or two of the spills inside to use the Y register. + Reserved.set(AVR::R28); + Reserved.set(AVR::R29); + Reserved.set(AVR::R29R28); return Reserved; } diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp index c3d43ebb407e..4dbbce8c205e 100644 --- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp @@ -177,7 +177,7 @@ unsigned AVRMCCodeEmitter::encodeComplement(const MCInst &MI, unsigned OpNo, return (~0) - Imm; } -template <AVR::Fixups Fixup> +template <AVR::Fixups Fixup, unsigned Offset> unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { @@ -193,7 +193,7 @@ unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo, } MCFixupKind FixupKind = static_cast<MCFixupKind>(Fixup); - Fixups.push_back(MCFixup::create(0, MO.getExpr(), FixupKind, MI.getLoc())); + Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc())); return 0; } diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h index 4cee8d904c9d..883abf8db78a 100644 --- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h +++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h @@ -69,7 +69,8 @@ private: const MCSubtargetInfo &STI) const; /// Encodes an immediate value with a given fixup. - template <AVR::Fixups Fixup> + /// \tparam Offset The offset into the instruction for the fixup. + template <AVR::Fixups Fixup, unsigned Offset> unsigned encodeImm(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index 9beefcdcc1d5..b98621ca4749 100644 --- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" @@ -88,9 +90,9 @@ static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn, } #include "BPFGenDisassemblerTables.inc" - static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address, - uint64_t &Size, uint64_t &Insn) { + uint64_t &Size, uint64_t &Insn, + bool IsLittleEndian) { uint64_t Lo, Hi; if (Bytes.size() < 8) { @@ -99,8 +101,14 @@ static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address, } Size = 8; - Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8); - Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24); + if (IsLittleEndian) { + Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8); + Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24); + } else { + Hi = (Bytes[0] << 24) | ((Bytes[1] & 0x0F) << 20) | ((Bytes[1] & 0xF0) << 12) | + (Bytes[2] << 8) | (Bytes[3] << 0); + Lo = (Bytes[4] << 24) | (Bytes[5] << 16) | (Bytes[6] << 8) | (Bytes[7] << 0); + } Insn = Make_64(Hi, Lo); return MCDisassembler::Success; @@ -111,10 +119,11 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const { - uint64_t Insn; + bool IsLittleEndian = getContext().getAsmInfo()->isLittleEndian(); + uint64_t Insn, Hi; DecodeStatus Result; - Result = readInstruction64(Bytes, Address, Size, Insn); + Result = readInstruction64(Bytes, Address, Size, Insn, IsLittleEndian); if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; Result = decodeInstruction(DecoderTableBPF64, Instr, Insn, @@ -128,7 +137,10 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return MCDisassembler::Fail; } Size = 16; - uint64_t Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24); + if (IsLittleEndian) + Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24); + else + Hi = (Bytes[12] << 24) | (Bytes[13] << 16) | (Bytes[14] << 8) | (Bytes[15] << 0); auto& Op = Instr.getOperand(1); Op.setImm(Make_64(Hi, Op.getImm())); break; diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 4bbc36a86e5b..b0b2644fffbe 100644 --- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -307,7 +307,7 @@ public: bool iss31_1Imm() const { return true; } bool iss30_2Imm() const { return true; } bool iss29_3Imm() const { return true; } - bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); } + bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); } bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); } bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); } bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); } @@ -459,94 +459,16 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { DEBUG(MCB.dump_pretty(dbgs())); DEBUG(dbgs() << "--\n"); + MCB.setLoc(IDLoc); // Check the bundle for errors. const MCRegisterInfo *RI = getContext().getRegisterInfo(); - HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI); + HexagonMCChecker Check(getContext(), MCII, getSTI(), MCB, *RI); bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), getContext(), MCB, &Check); - while (Check.getNextErrInfo()) { - unsigned Reg = Check.getErrRegister(); - Twine R(RI->getName(Reg)); - - uint64_t Err = Check.getError(); - if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { - if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) - return Error( - IDLoc, - "unconditional branch cannot precede another branch in packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || - HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) - return Error(IDLoc, "register `" + R + - "' used with `.new' " - "but not validly modified in the same packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) - return Error(IDLoc, "register `" + R + "' modified more than once"); - - if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) - return Error(IDLoc, "cannot write to read-only register `" + R + "'"); - - if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) - return Error(IDLoc, "loop-setup and some branch instructions " - "cannot be in the same packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { - Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); - return Error(IDLoc, - "packet marked with `:endloop" + N + "' " + - "cannot contain instructions that modify register " + - "`" + R + "'"); - } - - if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) - return Error( - IDLoc, - "instruction cannot appear in packet with other instructions"); - - if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) - return Error(IDLoc, "too many slots used in packet"); - - if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { - uint64_t Erm = Check.getShuffleError(); - - if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) - return Error(IDLoc, "invalid instruction packet"); - else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) - return Error(IDLoc, "invalid instruction packet: too many stores"); - else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) - return Error(IDLoc, "invalid instruction packet: too many loads"); - else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) - return Error(IDLoc, "too many branches in packet"); - else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) - return Error(IDLoc, "invalid instruction packet: out of slots"); - else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) - return Error(IDLoc, "invalid instruction packet: slot error"); - else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) - return Error(IDLoc, "v60 packet violation"); - else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) - return Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); - else - return Error(IDLoc, "unknown error in instruction packet"); - } - } - - unsigned Warn = Check.getWarning(); - if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) { - if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn) - Warning(IDLoc, "register `" + R + "' used with `.cur' " - "but not used in the same packet"); - else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn) - Warning(IDLoc, "register `" + R + "' used with `.tmp' " - "but not used in the same packet"); - } - } - if (CheckOk) { - MCB.setLoc(IDLoc); if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { assert(!HexagonMCInstrInfo::isInnerLoop(MCB)); assert(!HexagonMCInstrInfo::isOuterLoop(MCB)); @@ -1370,13 +1292,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_iconst: { Inst.setOpcode(Hexagon::A2_addi); MCOperand Reg = Inst.getOperand(0); - MCOperand S16 = Inst.getOperand(1); - HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + MCOperand S27 = Inst.getOperand(1); + HexagonMCInstrInfo::setMustNotExtend(*S27.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S27.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); - Inst.addOperand(S16); + Inst.addOperand(S27); break; } case Hexagon::M4_mpyrr_addr: diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index ae15ed0e9240..3396ddbe4fa6 100644 --- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -191,7 +191,8 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; if (Size > HEXAGON_MAX_PACKET_SIZE) return MCDisassembler::Fail; - HexagonMCChecker Checker(*MCII, STI, MI, MI, *getContext().getRegisterInfo()); + HexagonMCChecker Checker(getContext(), *MCII, STI, MI, + *getContext().getRegisterInfo(), false); if (!Checker.check()) return MCDisassembler::Fail; return MCDisassembler::Success; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index c8483f7e6e76..bb5128e7500f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -298,7 +298,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCOperand Reg = Inst.getOperand(0); MCOperand S16 = Inst.getOperand(1); HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S16.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index 90ccecb6629a..af0f8b265bda 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -57,12 +57,10 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // tion). To avoid the complications with in-memory arguments, only consi- // der the initial sequence of formal parameters that are known to be // passed via registers. - unsigned AttrIdx = 0; unsigned InVirtReg, InPhysReg = 0; const Function &F = *MF.getFunction(); typedef Function::const_arg_iterator arg_iterator; for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - AttrIdx++; const Argument &Arg = *I; Type *ATy = Arg.getType(); unsigned Width = 0; @@ -74,8 +72,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // Module::AnyPointerSize. if (Width == 0 || Width > 64) break; - AttributeList Attrs = F.getAttributes(); - if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) continue; InPhysReg = getNextPhysReg(InPhysReg, Width); if (!InPhysReg) @@ -83,9 +80,9 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, InVirtReg = getVirtRegFor(InPhysReg); if (!InVirtReg) continue; - if (Attrs.hasAttribute(AttrIdx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width))); - else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width))); } } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 2f8fe6e087f5..c7b422e7efd0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -38,6 +38,7 @@ class HexagonCFGOptimizer : public MachineFunctionPass { private: void InvertAndChangeJumpTarget(MachineInstr &, MachineBasicBlock *); + bool isOnFallThroughPath(MachineBasicBlock *MBB); public: static char ID; @@ -106,6 +107,14 @@ void HexagonCFGOptimizer::InvertAndChangeJumpTarget( MI.getOperand(1).setMBB(NewTarget); } +bool HexagonCFGOptimizer::isOnFallThroughPath(MachineBasicBlock *MBB) { + if (MBB->canFallThrough()) + return true; + for (MachineBasicBlock *PB : MBB->predecessors()) + if (PB->isLayoutSuccessor(MBB) && PB->canFallThrough()) + return true; + return false; +} bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(*Fn.getFunction())) @@ -182,7 +191,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { } if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { - // Ensure that BB2 has one instruction -- an unconditional jump. if ((LayoutSucc->size() == 1) && IsUnconditionalJump(LayoutSucc->front().getOpcode())) { @@ -211,9 +219,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { JumpAroundTarget->moveAfter(LayoutSucc); // only move a block if it doesn't have a fall-thru. otherwise // the CFG will be incorrect. - if (!UncondTarget->canFallThrough()) { + if (!isOnFallThroughPath(UncondTarget)) UncondTarget->moveAfter(JumpAroundTarget); - } } // diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h index f8ae39a37994..331edaf5831d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h @@ -21,7 +21,6 @@ enum Type { TypeCVI_VA = 16, TypeCVI_VA_DV = 17, TypeCVI_VINLANESAT = 18, - TypeCVI_VM_CUR_LD = 19, TypeCVI_VM_LD = 20, TypeCVI_VM_NEW_ST = 21, TypeCVI_VM_ST = 22, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td index f1d689ce12f4..b35f7ba6d2ab 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td @@ -19,7 +19,6 @@ def TypeCVI_HIST : IType<10>; def TypeCVI_VA : IType<16>; def TypeCVI_VA_DV : IType<17>; def TypeCVI_VINLANESAT : IType<18>; -def TypeCVI_VM_CUR_LD : IType<19>; def TypeCVI_VM_LD : IType<20>; def TypeCVI_VM_NEW_ST : IType<21>; def TypeCVI_VM_ST : IType<22>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index 2bfde9acaea9..d910d4af2191 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -26685,6 +26685,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26701,6 +26702,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26709,7 +26711,7 @@ def V6_vL32b_cur_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26719,6 +26721,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26726,7 +26729,7 @@ def V6_vL32b_cur_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26736,6 +26739,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26744,7 +26748,7 @@ def V6_vL32b_cur_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26755,6 +26759,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26763,7 +26768,7 @@ def V6_vL32b_cur_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26774,6 +26779,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26783,7 +26789,7 @@ def V6_vL32b_cur_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26793,6 +26799,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26801,7 +26808,7 @@ def V6_vL32b_cur_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26811,6 +26818,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26829,6 +26837,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26846,6 +26855,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26855,7 +26865,7 @@ def V6_vL32b_cur_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26863,6 +26873,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26871,7 +26882,7 @@ def V6_vL32b_cur_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26879,6 +26890,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26888,7 +26900,7 @@ def V6_vL32b_cur_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26897,6 +26909,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26904,7 +26917,7 @@ def V6_vL32b_cur_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26913,6 +26926,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26921,7 +26935,7 @@ def V6_vL32b_cur_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26931,6 +26945,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26939,7 +26954,7 @@ def V6_vL32b_cur_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26949,6 +26964,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26958,7 +26974,7 @@ def V6_vL32b_cur_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26967,6 +26983,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26975,7 +26992,7 @@ def V6_vL32b_cur_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26984,6 +27001,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -27150,8 +27168,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_ai_128B : HInst< @@ -27167,8 +27186,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27176,7 +27196,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27186,15 +27206,16 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27204,8 +27225,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27213,7 +27235,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27224,8 +27246,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27233,7 +27256,7 @@ def V6_vL32b_nt_cur_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27244,8 +27267,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27254,7 +27278,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27264,8 +27288,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27273,7 +27298,7 @@ def V6_vL32b_nt_cur_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27283,8 +27308,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27302,8 +27328,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27320,8 +27347,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27330,7 +27358,7 @@ def V6_vL32b_nt_cur_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27338,8 +27366,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27347,7 +27376,7 @@ def V6_vL32b_nt_cur_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27355,8 +27384,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27365,7 +27395,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27374,15 +27404,16 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27391,8 +27422,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27400,7 +27432,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27410,8 +27442,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27419,7 +27452,7 @@ def V6_vL32b_nt_cur_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27429,8 +27462,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27439,7 +27473,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27448,8 +27482,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27457,7 +27492,7 @@ def V6_vL32b_nt_cur_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +CVI_VM_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27466,8 +27501,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -28936,8 +28972,9 @@ let Inst{31-21} = 0b00101000001; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28954,8 +28991,9 @@ let Inst{31-21} = 0b00101000001; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28974,8 +29012,9 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -28992,8 +29031,9 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29012,8 +29052,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29032,8 +29073,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29052,8 +29094,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29071,8 +29114,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29090,8 +29134,9 @@ let Inst{31-21} = 0b00101001001; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29109,8 +29154,9 @@ let Inst{31-21} = 0b00101001001; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29128,8 +29174,9 @@ let Inst{31-21} = 0b00101011001; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29146,8 +29193,9 @@ let Inst{31-21} = 0b00101011001; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29166,8 +29214,9 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29183,8 +29232,9 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29202,8 +29252,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29221,8 +29272,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29240,8 +29292,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29258,8 +29311,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29498,9 +29552,10 @@ let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29517,9 +29572,10 @@ let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29538,9 +29594,10 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29557,9 +29614,10 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29578,9 +29636,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29599,9 +29658,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29620,9 +29680,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29640,9 +29701,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29660,9 +29722,10 @@ let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29680,9 +29743,10 @@ let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29700,9 +29764,10 @@ let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29719,9 +29784,10 @@ let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29740,9 +29806,10 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29758,9 +29825,10 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29778,9 +29846,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29798,9 +29867,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29818,9 +29888,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29837,9 +29908,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index e5eb059b566f..861af94f1e38 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1720,8 +1720,13 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); + unsigned Flags = + static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls() + ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended + : HexagonII::MO_GDPLT; + return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, - Hexagon::R0, HexagonII::MO_GDPLT); + Hexagon::R0, Flags); } // diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index 39c2a6e4f5a5..709d64585c0b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -7,16 +7,6 @@ // //===----------------------------------------------------------------------===// -// Maintain list of valid subtargets for each instruction. -class SubTarget<bits<6> value> { - bits<6> Value = value; -} - -def HasAnySubT : SubTarget<0x3f>; // 111111 -def HasV5SubT : SubTarget<0x3e>; // 111110 -def HasV55SubT : SubTarget<0x3c>; // 111100 -def HasV60SubT : SubTarget<0x38>; // 111000 - // Addressing modes for load/store instructions class AddrModeType<bits<3> value> { bits<3> Value = value; @@ -131,12 +121,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<2> opExtentAlign = 0; let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending. - // If an instruction is valid on a subtarget, set the corresponding - // bit from validSubTargets. - // By default, instruction is valid on all subtargets. - SubTarget validSubTargets = HasAnySubT; - let TSFlags{40-35} = validSubTargets.Value; - // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; let TSFlags{43-41} = addrMode.Value; @@ -165,6 +149,9 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bit cofMax1 = 0; let TSFlags{60} = cofMax1; + bit CVINew = 0; + let TSFlags{61} = CVINew; + // Fields used for relation models. bit isNonTemporal = 0; string isNT = ""; // set to "true" for non-temporal vector stores. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td index c8a7faea5ed5..b913727972e5 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -15,8 +15,6 @@ // Instruction Classes Definitions + //----------------------------------------------------------------------------// -let validSubTargets = HasV60SubT in -{ class CVI_VA_Resource<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> @@ -131,12 +129,6 @@ class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr, : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>, OpcodeHexagon, Requires<[HasV60T, UseHVX]>; -class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr, - list<dag> pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_CUR_LD> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VM_VP_LDU> @@ -190,10 +182,7 @@ class CVI_HIST_Resource<dag outs, dag ins, string asmstr, InstrItinClass itin = CVI_HIST> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, OpcodeHexagon, Requires<[HasV60T, UseHVX]>; -} -let validSubTargets = HasV60SubT in -{ class CVI_VA_Resource1<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> @@ -211,6 +200,3 @@ class CVI_HIST_Resource1<dag outs, dag ins, string asmstr, InstrItinClass itin = CVI_HIST> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, Requires<[HasV60T, UseHVX]>; -} - - diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index b265a883da5c..852bfb1b4f54 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -869,6 +869,9 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); unsigned KillFlag = getKillRegState(isKill); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget<HexagonSubtarget>(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, @@ -899,24 +902,36 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B : Hexagon::V6_vS32b_ai_128B; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32b_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai : Hexagon::PS_vstorerw_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B : Hexagon::PS_vstorerw_ai_128B; BuildMI(MBB, I, DL, get(Opc)) @@ -935,6 +950,9 @@ void HexagonInstrInfo::loadRegFromStackSlot( MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget<HexagonSubtarget>(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, @@ -959,21 +977,33 @@ void HexagonInstrInfo::loadRegFromStackSlot( BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B : Hexagon::PS_vloadrw_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B : Hexagon::V6_vL32b_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32b_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai : Hexagon::PS_vloadrw_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) @@ -1110,8 +1140,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_lo)) - .add(MI.getOperand(1)) - .addImm(MI.getOperand(2).getImm()); + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI1New->getOperand(1).setIsKill(false); BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi)) .add(MI.getOperand(1)) @@ -1940,7 +1971,7 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { case Hexagon::L4_return_fnew_pnt : case Hexagon::L4_return_tnew_pt : case Hexagon::L4_return_fnew_pt : - return true; + return true; } return false; } @@ -1967,12 +1998,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, if (RegA == RegB) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegA)) + if (TargetRegisterInfo::isPhysicalRegister(RegA)) for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) if (RegB == *SubRegs) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegB)) + if (TargetRegisterInfo::isPhysicalRegister(RegB)) for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) if (RegA == *SubRegs) return true; @@ -2139,7 +2170,7 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const { // This selection of jump instructions matches to that what - // AnalyzeBranch can parse, plus NVJ. + // analyzeBranch can parse, plus NVJ. if (isNewValueJump(MI)) // r9:2 return isInt<11>(offset); @@ -2666,6 +2697,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L2_loadrh_io: case Hexagon::L2_loadruh_io: case Hexagon::S2_storerh_io: + case Hexagon::S2_storerf_io: return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2876,6 +2908,11 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { + if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) { + const MachineOperand &Op = Second.getOperand(0); + if (Op.isReg() && Op.isUse() && Op.getReg() == Hexagon::R29) + return true; + } if (DisableNVSchedule) return false; if (mayBeNewStore(Second)) { @@ -3000,13 +3037,9 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator MII = BII; MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); - if (!MII->isBundle()) { + if (!(*MII).isBundle()) { const MachineInstr &J = *MII; - if (!isV60VectorInstruction(J)) - return false; - else if (isVecUsableNextPacket(J, MI)) - return false; - return true; + return producesStall(J, MI); } for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { @@ -3034,12 +3067,14 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, } bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { - return (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumptnew) || - (Opcode == Hexagon::J2_jumpfnew) || - (Opcode == Hexagon::J2_jumptnewpt) || - (Opcode == Hexagon::J2_jumpfnewpt); + return Opcode == Hexagon::J2_jumpt || + Opcode == Hexagon::J2_jumptpt || + Opcode == Hexagon::J2_jumpf || + Opcode == Hexagon::J2_jumpfpt || + Opcode == Hexagon::J2_jumptnew || + Opcode == Hexagon::J2_jumpfnew || + Opcode == Hexagon::J2_jumptnewpt || + Opcode == Hexagon::J2_jumpfnewpt; } bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { @@ -3341,9 +3376,30 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } +// Return the regular version of the .cur instruction. +int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_cur_pi: + return Hexagon::V6_vL32b_pi; + case Hexagon::V6_vL32b_cur_ai: + return Hexagon::V6_vL32b_ai; + //128B + case Hexagon::V6_vL32b_cur_pi_128B: + return Hexagon::V6_vL32b_pi_128B; + case Hexagon::V6_vL32b_cur_ai_128B: + return Hexagon::V6_vL32b_ai_128B; + } + return 0; +} + + // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +// // p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] // ^ ^ // / \ (not OK. it will cause new-value store to be @@ -3564,11 +3620,11 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); int NewOp = MI.getOpcode(); if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); - const MachineFunction &MF = *MI.getParent()->getParent(); - const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); // All Hexagon architectures have prediction bits on dot-new branches, // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure // to pick the right opcode when converting back to dot-old. @@ -3596,6 +3652,21 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { NewOp = Hexagon::getNonNVStore(NewOp); assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } + + if (HST.hasV60TOps()) + return NewOp; + + // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps. + switch (NewOp) { + case Hexagon::J2_jumpfpt: + return Hexagon::J2_jumpf; + case Hexagon::J2_jumptpt: + return Hexagon::J2_jumpt; + case Hexagon::J2_jumprfpt: + return Hexagon::J2_jumprf; + case Hexagon::J2_jumprtpt: + return Hexagon::J2_jumprt; + } return NewOp; } @@ -3947,18 +4018,6 @@ short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } -// Return first non-debug instruction in the basic block. -MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) - const { - for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { - MachineInstr &MI = *MII; - if (MI.isDebugValue()) - continue; - return &MI; - } - return nullptr; -} - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -4139,11 +4198,6 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } -unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; -} - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index b268c7a28171..21b4f738f6e8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -399,6 +399,7 @@ public: const MachineInstr &GB) const; int getCondOpcode(int Opc, bool sense) const; int getDotCurOp(const MachineInstr &MI) const; + int getNonDotCurOp(const MachineInstr &MI) const; int getDotNewOp(const MachineInstr &MI) const; int getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const; @@ -424,7 +425,6 @@ public: unsigned getSize(const MachineInstr &MI) const; uint64_t getType(const MachineInstr &MI) const; unsigned getUnits(const MachineInstr &MI) const; - unsigned getValidSubTargets(const unsigned Opcode) const; /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 1829c5da02a6..5a5799dbe009 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1420,7 +1420,7 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { for (auto &I : *LoopB) - if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT)) + if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) I.replaceAllUsesWith(SV); for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { @@ -2044,7 +2044,7 @@ CleanupAndExit: SCEV::FlagNUW); Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); if (Instruction *In = dyn_cast<Instruction>(NumBytes)) - if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT)) + if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) NumBytes = Simp; CallInst *NewCall; @@ -2156,7 +2156,7 @@ CleanupAndExit: Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, MemmoveB->getTerminator()); if (Instruction *In = dyn_cast<Instruction>(NumWords)) - if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT)) + if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) NumWords = Simp; Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp index 7189b5a52c42..072501d8260d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -39,7 +39,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Populate the relocation type based on Hexagon target flags // set on an operand MCSymbolRefExpr::VariantKind RelocationType; - switch (MO.getTargetFlags()) { + switch (MO.getTargetFlags() & ~HexagonII::HMOTF_ConstExtended) { default: RelocationType = MCSymbolRefExpr::VK_None; break; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h index dc10028c0424..810abf38863d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -32,14 +32,10 @@ using namespace llvm; namespace llvm { -//===----------------------------------------------------------------------===// -// ConvergingVLIWScheduler - Implementation of the standard -// MachineSchedStrategy. -//===----------------------------------------------------------------------===// class VLIWResourceModel { /// ResourcesModel - Represents VLIW state. - /// Not limited to VLIW targets per say, but assumes + /// Not limited to VLIW targets per se, but assumes /// definition of DFA by a target. DFAPacketizer *ResourcesModel; @@ -110,6 +106,11 @@ public: void schedule() override; }; +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + /// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics /// to balance the schedule. class ConvergingVLIWScheduler : public MachineSchedStrategy { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td index f87a1b8e424d..f80e0ef9e39f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -14,8 +14,8 @@ def f64Imm : Operand<f64> { let ParserMatchClass = f64ImmOperand; } def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>; def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; } def s9_0Imm : Operand<i32> { let ParserMatchClass = s9_0ImmOperand; } -def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; } -def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; } +def s27_2ImmOperand : AsmOperandClass { let Name = "s27_2Imm"; let RenderMethod = "addSignedImmOperands"; } +def s27_2Imm : Operand<i32> { let ParserMatchClass = s27_2ImmOperand; } def r32_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td index 5a720e794562..2e8def572c4b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -14,8 +14,11 @@ let PrintMethod = "printGlobalOperand" in { let isPseudo = 1 in { let isCodeGenOnly = 0 in -def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; -def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; +def A2_iconst : Pseudo<(outs IntRegs:$Rd32), + (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; + +def DUPLEX_Pseudo : InstHexagon<(outs), + (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; } let isExtendable = 1, opExtendable = 1, opExtentBits = 6, @@ -321,7 +324,7 @@ def LDriw_mod : LDInst<(outs ModRegs:$dst), // Vector load let Predicates = [HasV60T, UseHVX] in -let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +let mayLoad = 1, hasSideEffects = 0 in class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VM_LD, IType type = TypeCVI_VM_LD> @@ -329,7 +332,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in // Vector store let Predicates = [HasV60T, UseHVX] in -let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +let mayStore = 1, hasSideEffects = 0 in class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VM_ST, IType type = TypeCVI_VM_ST> @@ -415,7 +418,7 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { // Vector load/store pseudos -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +let isPseudo = 1, isCodeGenOnly = 1 in class STrivv_template<RegisterClass RC> : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>; @@ -429,7 +432,7 @@ def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>, Requires<[HasV60T,UseHVXDbl]>; -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +let isPseudo = 1, isCodeGenOnly = 1 in class LDrivv_template<RegisterClass RC> : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index 93ab2f731207..2519b7c40062 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -157,7 +157,7 @@ let Namespace = "Hexagon" in { // and isub_lo can be composed, which leads to all kinds of issues // with lane masks. def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; - def PC: Rc<9, "pc">, DwarfRegNum<[76]>; + def PC: Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>; def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>; def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 3a789a5f7e0b..bf1dce67bd0a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -356,7 +356,7 @@ void HexagonPacketizerList::cleanUpDotCur() { MachineInstr *MI = nullptr; for (auto BI : CurrentPacketMIs) { DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); - if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + if (HII->isDotCurInst(*BI)) { MI = BI; continue; } @@ -369,7 +369,7 @@ void HexagonPacketizerList::cleanUpDotCur() { if (!MI) return; // We did not find a use of the CUR, so de-cur it. - MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + MI->setDesc(HII->get(HII->getNonDotCurOp(*MI))); DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } @@ -1579,14 +1579,13 @@ MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); - if (MI.isImplicitDef()) { - unsigned R = MI.getOperand(0).getReg(); - if (Hexagon::IntRegsRegClass.contains(R)) { - MCSuperRegIterator S(R, HRI, false); - MI.addOperand(MachineOperand::CreateReg(*S, true, true)); - } + + if (CurrentPacketMIs.size() == 0) + PacketStalls = false; + PacketStalls |= producesStall(MI); + + if (MI.isImplicitDef()) return MII; - } assert(ResourceTracker->canReserveResources(MI)); bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); @@ -1677,6 +1676,11 @@ static bool isDependent(const MachineInstr &ProdMI, // V60 forward scheduling. bool HexagonPacketizerList::producesStall(const MachineInstr &I) { + // If the packet already stalls, then ignore the stall from a subsequent + // instruction in the same packet. + if (PacketStalls) + return false; + // Check whether the previous packet is in a different loop. If this is the // case, there is little point in trying to avoid a stall because that would // favor the rare case (loop entry) over the common case (loop iteration). @@ -1699,6 +1703,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I)) return true; } + return false; } @@ -1721,6 +1726,16 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { } } + // Check if the latency is greater than one between this instruction and any + // instruction in the previous packet. + SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)]; + for (auto J : OldPacketMIs) { + SUnit *SUJ = MIToSUnit[J]; + for (auto &Pred : SUI->Preds) + if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) + return true; + } + return false; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 3f28dc5b79ce..adb92b6dc855 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -34,6 +34,10 @@ class HexagonPacketizerList : public VLIWPacketizerList { // Track MIs with ignored dependence. std::vector<MachineInstr*> IgnoreDepMIs; + // Set to true if the packet contains an instruction that stalls with an + // instruction from the previous packet. + bool PacketStalls = false; + protected: /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 337af294eb86..904403543e18 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -58,6 +58,7 @@ class HexagonAsmBackend : public MCAsmBackend { RF.getContents() = Code; RF.getFixups() = Fixups; } + public: HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI, StringRef CPU) : @@ -183,7 +184,11 @@ public: { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 }, - { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 } + { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -290,6 +295,11 @@ public: case fixup_Hexagon_32_PCREL: case fixup_Hexagon_6_PCREL_X: case fixup_Hexagon_23_REG: + case fixup_Hexagon_27_REG: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: // These relocations should always have a relocation recorded IsResolved = false; return; @@ -346,6 +356,8 @@ public: case fixup_Hexagon_B9_PCREL_X: case fixup_Hexagon_B7_PCREL: case fixup_Hexagon_B7_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: return 4; } } @@ -373,6 +385,8 @@ public: break; case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: Value >>= 6; break; } @@ -711,22 +725,24 @@ public: break; } case MCFragment::FT_Relaxable: { + MCContext &Context = Asm.getContext(); auto &RF = cast<MCRelaxableFragment>(*K); auto &Inst = const_cast<MCInst &>(RF.getInst()); while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) { - MCInst *Nop = new (Asm.getContext()) MCInst; + MCInst *Nop = new (Context) MCInst; Nop->setOpcode(Hexagon::A2_nop); Inst.addOperand(MCOperand::createInst(Nop)); Size -= 4; if (!HexagonMCChecker( - *MCII, RF.getSubtargetInfo(), Inst, Inst, - *Asm.getContext().getRegisterInfo()).check()) { + Context, *MCII, RF.getSubtargetInfo(), Inst, + *Context.getRegisterInfo(), false) + .check()) { Inst.erase(Inst.end() - 1); Size = 0; } } - bool Error = HexagonMCShuffle(true, *MCII, RF.getSubtargetInfo(), - Inst); + bool Error = HexagonMCShuffle(Context, true, *MCII, + RF.getSubtargetInfo(), Inst); //assert(!Error); (void)Error; ReplaceInstruction(Asm.getEmitter(), RF, Inst); diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 9c80312b790d..adb546dc2140 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -128,10 +128,6 @@ namespace HexagonII { ExtentAlignPos = 33, ExtentAlignMask = 0x3, - // Valid subtargets - validSubTargetPos = 35, - validSubTargetMask = 0x3f, - // Addressing mode for load/store instructions. AddrModePos = 41, AddrModeMask = 0x7, @@ -163,7 +159,10 @@ namespace HexagonII { PrefersSlot3Mask = 0x1, CofMax1Pos = 60, - CofMax1Mask = 0x1 + CofMax1Mask = 0x1, + + CVINewPos = 61, + CVINewMask = 0x1 }; // *** The code above must match HexagonInstrFormat*.td *** // diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 944e235e72f2..b975e3131094 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -284,6 +284,16 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_HEX_TPREL_11_X; case fixup_Hexagon_23_REG: return ELF::R_HEX_23_REG; + case fixup_Hexagon_27_REG: + return ELF::R_HEX_27_REG; + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + return ELF::R_HEX_GD_PLT_B22_PCREL_X; + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + return ELF::R_HEX_GD_PLT_B32_PCREL_X; + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + return ELF::R_HEX_LD_PLT_B22_PCREL_X; + case fixup_Hexagon_LD_PLT_B32_PCREL_X: + return ELF::R_HEX_LD_PLT_B32_PCREL_X; } } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h index 4c97ebbdd346..347327669ad9 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -111,6 +111,11 @@ enum Fixups { fixup_Hexagon_TPREL_16_X, fixup_Hexagon_TPREL_11_X, fixup_Hexagon_23_REG, + fixup_Hexagon_27_REG, + fixup_Hexagon_GD_PLT_B22_PCREL_X, + fixup_Hexagon_GD_PLT_B32_PCREL_X, + fixup_Hexagon_LD_PLT_B22_PCREL_X, + fixup_Hexagon_LD_PLT_B32_PCREL_X, LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 62b21c419f30..3bb658b84451 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -16,23 +16,27 @@ #include "HexagonBaseInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false), - cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity")); +static cl::opt<bool> + RelaxNVChecks("relax-nv-checks", cl::init(false), cl::ZeroOrMore, + cl::Hidden, cl::desc("Relax checks of new-value validity")); const HexagonMCChecker::PredSense - HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); + HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); void HexagonMCChecker::init() { // Initialize read-only registers set. ReadOnly.insert(Hexagon::PC); + ReadOnly.insert(Hexagon::C9_8); // Figure out the loop-registers definitions. if (HexagonMCInstrInfo::isInnerLoop(MCB)) { @@ -46,13 +50,12 @@ void HexagonMCChecker::init() { if (HexagonMCInstrInfo::isBundle(MCB)) // Unfurl a bundle. - for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { MCInst const &Inst = *I.getInst(); if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) { init(*Inst.getOperand(0).getInst()); init(*Inst.getOperand(1).getInst()); - } - else + } else init(Inst); } else @@ -69,20 +72,18 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg, // Note use of new predicate register. if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) NewPreds.insert(PredReg); - } - else + } else // Note register use. Super-registers are not tracked directly, // but their components. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) // Skip super-registers used indirectly. Uses.insert(*SRI); } -void HexagonMCChecker::init(MCInst const& MCI) { - const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); +void HexagonMCChecker::init(MCInst const &MCI) { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); unsigned PredReg = Hexagon::NoRegister; bool isTrue = false; @@ -109,10 +110,10 @@ void HexagonMCChecker::init(MCInst const& MCI) { if (Hexagon::USR_OVF == R) // Many insns change the USR implicitly, but only one or another flag. - // The instruction table models the USR.OVF flag, which can be implicitly - // modified more than once, but cannot be modified in the same packet - // with an instruction that modifies is explicitly. Deal with such situ- - // ations individually. + // The instruction table models the USR.OVF flag, which can be + // implicitly modified more than once, but cannot be modified in the + // same packet with an instruction that modifies is explicitly. Deal + // with such situations individually. SoftDefs.insert(R); else if (isPredicateRegister(R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) @@ -124,8 +125,7 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Figure out explicit register definitions. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { - unsigned R = MCI.getOperand(i).getReg(), - S = Hexagon::NoRegister; + unsigned R = MCI.getOperand(i).getReg(), S = Hexagon::NoRegister; // USR has subregisters (while C8 does not for technical reasons), so // reset R to USR, since we know how to handle multiple defs of USR, // taking into account its subregisters. @@ -134,9 +134,8 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Note register definitions, direct ones as well as indirect side-effects. // Super-registers are not tracked directly, but their components. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) { + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) { if (MCSubRegIterator(*SRI, &RI).isValid()) // Skip super-registers defined indirectly. continue; @@ -156,22 +155,19 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Only an explicit definition of P3:0 is noted as such; if a // side-effect, then note as a soft definition. SoftDefs.insert(*SRI); - else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI)) + else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && + isPredicateRegister(*SRI)) // Some insns produce predicates too late to be used in the same packet. LatePreds.insert(*SRI); - else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD) - // Current loads should be used in the same packet. - // TODO: relies on the impossibility of a current and a temporary loads - // in the same packet. - CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); - else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == + HexagonII::TypeCVI_VM_TMP_LD) // Temporary loads should be used in the same packet, but don't commit // results, so it should be disregarded if another insn changes the same // register. // TODO: relies on the impossibility of a current and a temporary loads // in the same packet. TmpDefs.insert(*SRI); - else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) ) + else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) Uses.insert(*SRI); @@ -187,25 +183,25 @@ void HexagonMCChecker::init(MCInst const& MCI) { if (HexagonMCInstrInfo::isCompound(MCII, MCI)) compoundRegisterMap(R); // Compound insns have a limited register range. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) // No super-registers defined indirectly. - NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); + NewDefs[*SRI].push_back(NewSense::Def( + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); // For fairly unique 2-dot-new producers, example: // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); - for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid()); - SRI.isValid(); - ++SRI) + bool HasSubRegs = MCSubRegIterator(R2, &RI).isValid(); + for (MCRegAliasIterator SRI(R2, &RI, !HasSubRegs); SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) - NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); + NewDefs[*SRI].push_back(NewSense::Def( + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); } } @@ -227,18 +223,19 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Super-registers cannot use new values. if (MCID.isBranch()) NewUses[N] = NewSense::Jmp( - llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ); + llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ); else NewUses[N] = NewSense::Use( - PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); } } } -HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx, - MCRegisterInfo const &ri) - : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI), - bLoadErrInfo(false) { +HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &mcb, + MCRegisterInfo const &ri, bool ReportErrors) + : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI), + ReportErrors(ReportErrors) { init(); } @@ -247,24 +244,120 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkP = checkPredicates(); bool chkNV = checkNewValues(); bool chkR = checkRegisters(); + bool chkRRO = checkRegistersReadOnly(); + bool chkELB = checkEndloopBranches(); + checkRegisterCurDefs(); bool chkS = checkSolo(); bool chkSh = true; if (FullCheck) - chkSh = checkShuffle(); + chkSh = checkShuffle(); bool chkSl = true; if (FullCheck) - chkSl = checkSlots(); - bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl; + chkSl = checkSlots(); + bool chkAXOK = checkAXOK(); + bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkELB && chkS && + chkSh && chkSl && chkAXOK; return chk; } -bool HexagonMCChecker::checkSlots() +bool HexagonMCChecker::checkEndloopBranches() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall()) { + auto Inner = HexagonMCInstrInfo::isInnerLoop(MCB); + if (Inner || HexagonMCInstrInfo::isOuterLoop(MCB)) { + reportError(I.getLoc(), + llvm::Twine("packet marked with `:endloop") + + (Inner ? "0" : "1") + "' " + + "cannot contain instructions that modify register " + + "`" + llvm::Twine(RI.getName(Hexagon::PC)) + "'"); + return false; + } + } + } + return true; +} + +namespace { +bool isDuplexAGroup(unsigned Opcode) { + switch (Opcode) { + case Hexagon::SA1_addi: + case Hexagon::SA1_addrx: + case Hexagon::SA1_addsp: + case Hexagon::SA1_and1: + case Hexagon::SA1_clrf: + case Hexagon::SA1_clrfnew: + case Hexagon::SA1_clrt: + case Hexagon::SA1_clrtnew: + case Hexagon::SA1_cmpeqi: + case Hexagon::SA1_combine0i: + case Hexagon::SA1_combine1i: + case Hexagon::SA1_combine2i: + case Hexagon::SA1_combine3i: + case Hexagon::SA1_combinerz: + case Hexagon::SA1_combinezr: + case Hexagon::SA1_dec: + case Hexagon::SA1_inc: + case Hexagon::SA1_seti: + case Hexagon::SA1_setin1: + case Hexagon::SA1_sxtb: + case Hexagon::SA1_sxth: + case Hexagon::SA1_tfr: + case Hexagon::SA1_zxtb: + case Hexagon::SA1_zxth: + return true; + break; + default: + return false; + } +} + +bool isNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { + unsigned Result = 0; + unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); + if (Type == HexagonII::TypeDUPLEX) { + unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); + unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); + Result += !isDuplexAGroup(subInst0Opcode); + Result += !isDuplexAGroup(subInst1Opcode); + } else + Result += + Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op && + Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op && + Type != HexagonII::TypeS_3op && + (Type != HexagonII::TypeALU64 || HexagonMCInstrInfo::isFloat(MCII, ID)); + return Result != 0; +} +} // namespace + +bool HexagonMCChecker::checkAXOK() { + MCInst const *HasSoloAXInst = nullptr; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isSoloAX(MCII, I)) { + HasSoloAXInst = &I; + } + } + if (!HasSoloAXInst) + return true; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (&I != HasSoloAXInst && isNeitherAnorX(MCII, I)) { + reportError( + HasSoloAXInst->getLoc(), + llvm::Twine("Instruction can only be in a packet with ALU or " + "non-FPU XTYPE instructions")); + reportError(I.getLoc(), + llvm::Twine("Not an ALU or non-FPU XTYPE instruction")); + return false; + } + } + return true; +} -{ +bool HexagonMCChecker::checkSlots() { unsigned slotsUsed = 0; - for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) { - MCInst const& MCI = *HMI.getInst(); + for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + MCInst const &MCI = *HMI.getInst(); if (HexagonMCInstrInfo::isImmext(MCI)) continue; if (HexagonMCInstrInfo::isDuplex(MCII, MCI)) @@ -274,9 +367,7 @@ bool HexagonMCChecker::checkSlots() } if (slotsUsed > HEXAGON_PACKET_SIZE) { - HexagonMCErrInfo errInfo; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS); - addErrInfo(errInfo); + reportError("invalid instruction packet: out of slots"); return false; } return true; @@ -284,11 +375,9 @@ bool HexagonMCChecker::checkSlots() // Check legal use of branches. bool HexagonMCChecker::checkBranches() { - HexagonMCErrInfo errInfo; if (HexagonMCInstrInfo::isBundle(MCB)) { bool hasConditional = false; - unsigned Branches = 0, - Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, + unsigned Branches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; @@ -310,20 +399,12 @@ bool HexagonMCChecker::checkBranches() { } } - if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? - if (HexagonMCInstrInfo::isInnerLoop(MCB) || - HexagonMCInstrInfo::isOuterLoop(MCB)) { - // Error out if there's any branch in a loop-end packet. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC); - addErrInfo(errInfo); - return false; - } if (Branches > 1) if (!hasConditional || Conditional > Unconditional) { // Error out if more than one unconditional branch or // the conditional branch appears after the unconditional one. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES); - addErrInfo(errInfo); + reportError( + "unconditional branch cannot precede another branch in packet"); return false; } } @@ -333,31 +414,28 @@ bool HexagonMCChecker::checkBranches() { // Check legal use of predicate registers. bool HexagonMCChecker::checkPredicates() { - HexagonMCErrInfo errInfo; // Check for proper use of new predicate registers. - for (const auto& I : NewPreds) { + for (const auto &I : NewPreds) { unsigned P = I; if (!Defs.count(P) || LatePreds.count(P)) { // Error out if the new predicate register is not defined, // or defined "late" // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P); - addErrInfo(errInfo); + reportErrorNewValue(P); return false; } } // Check for proper use of auto-anded of predicate registers. - for (const auto& I : LatePreds) { + for (const auto &I : LatePreds) { unsigned P = I; if (LatePreds.count(P) > 1 || Defs.count(P)) { // Error out if predicate register defined "late" multiple times or // defined late and regularly defined // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }". - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P); - addErrInfo(errInfo); + reportErrorRegisters(P); return false; } } @@ -367,15 +445,12 @@ bool HexagonMCChecker::checkPredicates() { // Check legal use of new values. bool HexagonMCChecker::checkNewValues() { - HexagonMCErrInfo errInfo; - memset(&errInfo, 0, sizeof(errInfo)); - for (auto& I : NewUses) { + for (auto &I : NewUses) { unsigned R = I.first; NewSense &US = I.second; if (!hasValidNewValueDef(US, NewDefs[R])) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R); - addErrInfo(errInfo); + reportErrorNewValue(R); return false; } } @@ -383,25 +458,61 @@ bool HexagonMCChecker::checkNewValues() { return true; } +bool HexagonMCChecker::checkRegistersReadOnly() { + for (auto I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + MCInst const &Inst = *I.getInst(); + unsigned Defs = HexagonMCInstrInfo::getDesc(MCII, Inst).getNumDefs(); + for (unsigned j = 0; j < Defs; ++j) { + MCOperand const &Operand = Inst.getOperand(j); + assert(Operand.isReg() && "Def is not a register"); + unsigned Register = Operand.getReg(); + if (ReadOnly.find(Register) != ReadOnly.end()) { + reportError(Inst.getLoc(), "Cannot write to read-only register `" + + llvm::Twine(RI.getName(Register)) + "'"); + return false; + } + } + } + return true; +} + +bool HexagonMCChecker::registerUsed(unsigned Register) { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) + for (unsigned j = HexagonMCInstrInfo::getDesc(MCII, I).getNumDefs(), + n = I.getNumOperands(); + j < n; ++j) { + MCOperand const &Operand = I.getOperand(j); + if (Operand.isReg() && Operand.getReg() == Register) + return true; + } + return false; +} + +void HexagonMCChecker::checkRegisterCurDefs() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isCVINew(MCII, I) && + HexagonMCInstrInfo::getDesc(MCII, I).mayLoad()) { + unsigned Register = I.getOperand(0).getReg(); + if (!registerUsed(Register)) + reportWarning("Register `" + llvm::Twine(RI.getName(Register)) + + "' used with `.cur' " + "but not used in the same packet"); + } + } +} + // Check for legal register uses and definitions. bool HexagonMCChecker::checkRegisters() { - HexagonMCErrInfo errInfo; // Check for proper register definitions. - for (const auto& I : Defs) { + for (const auto &I : Defs) { unsigned R = I.first; - if (ReadOnly.count(R)) { - // Error out for definitions of read-only registers. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R); - addErrInfo(errInfo); - return false; - } if (isLoopRegister(R) && Defs.count(R) > 1 && (HexagonMCInstrInfo::isInnerLoop(MCB) || HexagonMCInstrInfo::isOuterLoop(MCB))) { // Error out for definitions of loop registers at the end of a loop. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R); - addErrInfo(errInfo); + reportError("loop-setup and some branch instructions " + "cannot be in the same packet"); return false; } if (SoftDefs.count(R)) { @@ -409,8 +520,7 @@ bool HexagonMCChecker::checkRegisters() { // (e.g., "{ usr = r0; r0 = sfadd(...) }"). unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); - addErrInfo(errInfo); + reportErrorRegisters(BadR); return false; } if (!isPredicateRegister(R) && Defs[R].size() > 1) { @@ -423,20 +533,18 @@ bool HexagonMCChecker::checkRegisters() { // changes, conditional or not. unsigned UsrR = Hexagon::USR; unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); - addErrInfo(errInfo); + reportErrorRegisters(BadR); return false; } // Check for multiple conditional register definitions. - for (const auto& J : PM) { + for (const auto &J : PM) { PredSense P = J; // Check for multiple uses of the same condition. if (PM.count(P) > 1) { // Error out on conditional changes based on the same predicate // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); - addErrInfo(errInfo); + reportErrorRegisters(R); return false; } // Check for the use of the complementary condition. @@ -444,44 +552,33 @@ bool HexagonMCChecker::checkRegisters() { if (PM.count(P) && PM.size() > 2) { // Error out on conditional changes based on the same predicate // multiple times - // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); - addErrInfo(errInfo); + // (e.g., "if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =..."). + reportErrorRegisters(R); return false; } } } } - // Check for use of current definitions. - for (const auto& I : CurDefs) { - unsigned R = I; - - if (!Uses.count(R)) { - // Warn on an unused current definition. - errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R); - addErrInfo(errInfo); - return true; - } - } - // Check for use of temporary definitions. - for (const auto& I : TmpDefs) { + for (const auto &I : TmpDefs) { unsigned R = I; if (!Uses.count(R)) { // special case for vhist bool vHistFound = false; - for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { - if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) { - vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp + for (auto const &HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if (llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == + HexagonII::TypeCVI_HIST) { + vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp break; } } // Warn on an unused temporary definition. if (vHistFound == false) { - errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R); - addErrInfo(errInfo); + reportWarning("register `" + llvm::Twine(RI.getName(R)) + + "' used with `.tmp' " + "but not used in the same packet"); return true; } } @@ -492,45 +589,25 @@ bool HexagonMCChecker::checkRegisters() { // Check for legal use of solo insns. bool HexagonMCChecker::checkSolo() { - HexagonMCErrInfo errInfo; - if (HexagonMCInstrInfo::isBundle(MCB) && - HexagonMCInstrInfo::bundleSize(MCB) > 1) { - for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO); - addErrInfo(errInfo); + if (HexagonMCInstrInfo::bundleSize(MCB) > 1) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, I)) { + reportError(I.getLoc(), "Instruction is marked `isSolo' and " + "cannot have other instructions in " + "the same packet"); return false; } } - } return true; } bool HexagonMCChecker::checkShuffle() { - HexagonMCErrInfo errInfo; - // Branch info is lost when duplexing. The unduplexed insns must be - // checked and only branch errors matter for this case. - HexagonMCShuffler MCS(true, MCII, STI, MCB); - if (!MCS.check()) { - if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); - errInfo.setShuffleError(MCS.getError()); - addErrInfo(errInfo); - return false; - } - } - HexagonMCShuffler MCSDX(true, MCII, STI, MCBDX); - if (!MCSDX.check()) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); - errInfo.setShuffleError(MCSDX.getError()); - addErrInfo(errInfo); - return false; - } - return true; + HexagonMCShuffler MCSDX(Context, ReportErrors, MCII, STI, MCB); + return MCSDX.check(); } -void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { +void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { switch (Register) { default: break; @@ -562,7 +639,7 @@ void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { } bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, - const NewSenseList &Defs) const { + const NewSenseList &Defs) const { bool Strict = !RelaxNVChecks; for (unsigned i = 0, n = Defs.size(); i < n; ++i) { @@ -590,3 +667,30 @@ bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, return false; } +void HexagonMCChecker::reportErrorRegisters(unsigned Register) { + reportError("register `" + llvm::Twine(RI.getName(Register)) + + "' modified more than once"); +} + +void HexagonMCChecker::reportErrorNewValue(unsigned Register) { + reportError("register `" + llvm::Twine(RI.getName(Register)) + + "' used with `.new' " + "but not validly modified in the same packet"); +} + +void HexagonMCChecker::reportError(llvm::Twine const &Msg) { + reportError(MCB.getLoc(), Msg); +} + +void HexagonMCChecker::reportError(SMLoc Loc, llvm::Twine const &Msg) { + if (ReportErrors) + Context.reportError(Loc, Msg); +} + +void HexagonMCChecker::reportWarning(llvm::Twine const &Msg) { + if (ReportErrors) { + auto SM = Context.getSourceManager(); + if (SM) + SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg); + } +} diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index c3b3d4c14c88..027f78b4899c 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -24,59 +24,14 @@ using namespace llvm; namespace llvm { class MCOperandInfo; -typedef struct { - unsigned Error, Warning, ShuffleError; - unsigned Register; -} ErrInfo_T; - -class HexagonMCErrInfo { -public: - enum { - CHECK_SUCCESS = 0, - // Errors. - CHECK_ERROR_BRANCHES = 0x00001, - CHECK_ERROR_NEWP = 0x00002, - CHECK_ERROR_NEWV = 0x00004, - CHECK_ERROR_REGISTERS = 0x00008, - CHECK_ERROR_READONLY = 0x00010, - CHECK_ERROR_LOOP = 0x00020, - CHECK_ERROR_ENDLOOP = 0x00040, - CHECK_ERROR_SOLO = 0x00080, - CHECK_ERROR_SHUFFLE = 0x00100, - CHECK_ERROR_NOSLOTS = 0x00200, - CHECK_ERROR_UNKNOWN = 0x00400, - // Warnings. - CHECK_WARN_CURRENT = 0x10000, - CHECK_WARN_TEMPORARY = 0x20000 - }; - ErrInfo_T s; - - void reset() { - s.Error = CHECK_SUCCESS; - s.Warning = CHECK_SUCCESS; - s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS; - s.Register = Hexagon::NoRegister; - }; - HexagonMCErrInfo() { - reset(); - }; - - void setError(unsigned e, unsigned r = Hexagon::NoRegister) - { s.Error = e; s.Register = r; }; - void setWarning(unsigned w, unsigned r = Hexagon::NoRegister) - { s.Warning = w; s.Register = r; }; - void setShuffleError(unsigned e) { s.ShuffleError = e; }; -}; - /// Check for a valid bundle. class HexagonMCChecker { - /// Insn bundle. - MCInst& MCB; - MCInst& MCBDX; - const MCRegisterInfo& RI; + MCContext &Context; + MCInst &MCB; + const MCRegisterInfo &RI; MCInstrInfo const &MCII; MCSubtargetInfo const &STI; - bool bLoadErrInfo; + bool ReportErrors; /// Set of definitions: register #, if predicated, if predicated true. typedef std::pair<unsigned, bool> PredSense; @@ -99,23 +54,23 @@ class HexagonMCChecker { bool IsFloat, IsNVJ, Cond; // The special-case "constructors": static NewSense Jmp(bool isNVJ) { - NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ, - /*Cond=*/ false }; + NewSense NS = {/*PredReg=*/0, /*IsFloat=*/false, /*IsNVJ=*/isNVJ, + /*Cond=*/false}; return NS; } static NewSense Use(unsigned PR, bool True) { - NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false, - /*Cond=*/ True }; + NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/false, /*IsNVJ=*/false, + /*Cond=*/True}; return NS; } static NewSense Def(unsigned PR, bool True, bool Float) { - NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false, - /*Cond=*/ True }; + NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/Float, /*IsNVJ=*/false, + /*Cond=*/True}; return NS; } }; /// Set of definitions that produce new register: - typedef llvm::SmallVector<NewSense,2> NewSenseList; + typedef llvm::SmallVector<NewSense, 2> NewSenseList; typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator; llvm::DenseMap<unsigned, NewSenseList> NewDefs; @@ -123,10 +78,6 @@ class HexagonMCChecker { typedef std::set<unsigned>::iterator SoftDefsIterator; std::set<unsigned> SoftDefs; - /// Set of current definitions committed to the register file. - typedef std::set<unsigned>::iterator CurDefsIterator; - std::set<unsigned> CurDefs; - /// Set of temporary definitions not committed to the register file. typedef std::set<unsigned>::iterator TmpDefsIterator; std::set<unsigned> TmpDefs; @@ -151,69 +102,51 @@ class HexagonMCChecker { typedef std::set<unsigned>::iterator ReadOnlyIterator; std::set<unsigned> ReadOnly; - std::queue<ErrInfo_T> ErrInfoQ; - HexagonMCErrInfo CrntErrInfo; - - void getErrInfo() { - if (bLoadErrInfo == true) { - if (ErrInfoQ.empty()) { - CrntErrInfo.reset(); - } else { - CrntErrInfo.s = ErrInfoQ.front(); - ErrInfoQ.pop(); - } - } - bLoadErrInfo = false; - } - void init(); - void init(MCInst const&); + void init(MCInst const &); void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); + bool registerUsed(unsigned Register); + // Checks performed. bool checkBranches(); bool checkPredicates(); bool checkNewValues(); bool checkRegisters(); + bool checkRegistersReadOnly(); + bool checkEndloopBranches(); + void checkRegisterCurDefs(); bool checkSolo(); bool checkShuffle(); bool checkSlots(); - bool checkSize(); + bool checkAXOK(); - static void compoundRegisterMap(unsigned&); + static void compoundRegisterMap(unsigned &); bool isPredicateRegister(unsigned R) const { - return (Hexagon::P0 == R || Hexagon::P1 == R || - Hexagon::P2 == R || Hexagon::P3 == R); + return (Hexagon::P0 == R || Hexagon::P1 == R || Hexagon::P2 == R || + Hexagon::P3 == R); }; bool isLoopRegister(unsigned R) const { - return (Hexagon::SA0 == R || Hexagon::LC0 == R || - Hexagon::SA1 == R || Hexagon::LC1 == R); + return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R || + Hexagon::LC1 == R); }; - bool hasValidNewValueDef(const NewSense &Use, - const NewSenseList &Defs) const; + bool hasValidNewValueDef(const NewSense &Use, const NewSenseList &Defs) const; - public: - explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx, - const MCRegisterInfo& ri); +public: + explicit HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &mcb, + const MCRegisterInfo &ri, bool ReportErrors = true); bool check(bool FullCheck = true); - - /// add a new error/warning - void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); }; - - /// Return the error code for the last operation in the insn bundle. - unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; }; - unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; }; - unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; }; - unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; }; - bool getNextErrInfo() { - bLoadErrInfo = true; - return (ErrInfoQ.empty()) ? false : (getErrInfo(), true); - } + void reportErrorRegisters(unsigned Register); + void reportErrorNewValue(unsigned Register); + void reportError(SMLoc Loc, llvm::Twine const &Msg); + void reportError(llvm::Twine const &Msg); + void reportWarning(llvm::Twine const &Msg); }; -} +} // namespace llvm #endif // HEXAGONMCCHECKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index c0956520de73..dfb5f4cc8260 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -199,6 +199,11 @@ Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits( return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; case MCSymbolRefExpr::VK_Hexagon_PCREL: return Hexagon::fixup_Hexagon_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_None: { auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) { @@ -318,6 +323,8 @@ namespace { case fixup_Hexagon_PLT_B22_PCREL: case fixup_Hexagon_GD_PLT_B22_PCREL: case fixup_Hexagon_LD_PLT_B22_PCREL: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: case fixup_Hexagon_6_PCREL_X: return true; default: @@ -414,10 +421,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case 22: switch (kind) { case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_None: FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X @@ -467,8 +476,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, } else switch (kind) { case MCSymbolRefExpr::VK_None: { - if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr())) - FixupKind = Hexagon::fixup_Hexagon_23_REG; + if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) + FixupKind = Hexagon::fixup_Hexagon_27_REG; else if (MCID.mayStore() || MCID.mayLoad()) { for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; @@ -593,6 +602,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case MCSymbolRefExpr::VK_Hexagon_LD_GOT: FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; break; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X; + break; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X; + break; case MCSymbolRefExpr::VK_None: FixupKind = Hexagon::fixup_Hexagon_11_X; break; diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index ffa980ca6563..127c97e342dc 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -406,7 +406,7 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co if (MCI.size() < 2) return; - bool StartedValid = llvm::HexagonMCShuffle(false, MCII, STI, MCI); + bool StartedValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI); // Create a vector, needed to keep the order of jump instructions. MCInst CheckList(MCI); @@ -420,8 +420,9 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co // Need to update the bundle. MCI = CheckList; - if (StartedValid && !llvm::HexagonMCShuffle(false, MCII, STI, MCI)) { - DEBUG(dbgs() << "Found ERROR\n"); + if (StartedValid && + !llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) { + DEBUG(dbgs() << "Found ERROR\n"); MCI = OriginalBundle; } } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp index 14300edc7e1b..9fbe299d7d52 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -94,9 +94,9 @@ void HexagonMCExpr::setMustNotExtend(bool Val) { } bool HexagonMCExpr::mustNotExtend() const { return MustNotExtend; } -bool HexagonMCExpr::s23_2_reloc() const { return S23_2_reloc; } -void HexagonMCExpr::setS23_2_reloc(bool Val) { - S23_2_reloc = Val; +bool HexagonMCExpr::s27_2_reloc() const { return S27_2_reloc; } +void HexagonMCExpr::setS27_2_reloc(bool Val) { + S27_2_reloc = Val; } bool HexagonMCExpr::classof(MCExpr const *E) { @@ -104,7 +104,7 @@ bool HexagonMCExpr::classof(MCExpr const *E) { } HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr) - : Expr(Expr), MustNotExtend(false), MustExtend(false), S23_2_reloc(false), + : Expr(Expr), MustNotExtend(false), MustExtend(false), S27_2_reloc(false), SignMismatch(false) {} void HexagonMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h index bca40cfaf6f4..acfd996ccf82 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -29,8 +29,8 @@ public: bool mustExtend() const; void setMustNotExtend(bool Val = true); bool mustNotExtend() const; - void setS23_2_reloc(bool Val = true); - bool s23_2_reloc() const; + void setS27_2_reloc(bool Val = true); + bool s27_2_reloc() const; void setSignMismatch(bool Val = true); bool signMismatch() const; @@ -39,7 +39,7 @@ private: MCExpr const *Expr; bool MustNotExtend; bool MustExtend; - bool S23_2_reloc; + bool S27_2_reloc; bool SignMismatch; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 553ffba508a1..5fe638a9996b 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -22,6 +22,49 @@ #include "llvm/MC/MCSubtargetInfo.h" namespace llvm { + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst) + : MCII(MCII), BundleCurrent(Inst.begin() + + HexagonMCInstrInfo::bundleInstructionsOffset), + BundleEnd(Inst.end()), DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst, std::nullptr_t) + : MCII(MCII), BundleCurrent(Inst.end()), BundleEnd(Inst.end()), + DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator &Hexagon::PacketIterator::operator++() { + if (DuplexCurrent != DuplexEnd) { + ++DuplexCurrent; + if (DuplexCurrent == DuplexEnd) { + DuplexCurrent = BundleEnd; + DuplexEnd = BundleEnd; + } + return *this; + } + ++BundleCurrent; + if (BundleCurrent != BundleEnd) { + MCInst const &Inst = *BundleCurrent->getInst(); + if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) { + DuplexCurrent = Inst.begin(); + DuplexEnd = Inst.end(); + } + } + return *this; +} + +MCInst const &Hexagon::PacketIterator::operator*() const { + if (DuplexCurrent != DuplexEnd) + return *DuplexCurrent->getInst(); + return *BundleCurrent->getInst(); +} + +bool Hexagon::PacketIterator::operator==(PacketIterator const &Other) const { + return BundleCurrent == Other.BundleCurrent && BundleEnd == Other.BundleEnd && + DuplexCurrent == Other.DuplexCurrent && DuplexEnd == Other.DuplexEnd; +} + void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, MCContext &Context) { MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); @@ -41,6 +84,14 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, MCB.addOperand(MCOperand::createInst(XMCI)); } +iterator_range<Hexagon::PacketIterator> +HexagonMCInstrInfo::bundleInstructions(MCInstrInfo const &MCII, + MCInst const &MCI) { + assert(isBundle(MCI)); + return make_range(Hexagon::PacketIterator(MCII, MCI), + Hexagon::PacketIterator(MCII, MCI, nullptr)); +} + iterator_range<MCInst::const_iterator> HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); @@ -66,7 +117,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, // instructions when possible. if (!HexagonDisableCompound) HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB); - HexagonMCShuffle(false, MCII, STI, MCB); + HexagonMCShuffle(Context, false, MCII, STI, MCB); // Examine the packet and convert pairs of instructions to duplex // instructions when possible. MCInst InstBundlePreDuplex = MCInst(MCB); @@ -74,7 +125,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, SmallVector<DuplexCandidate, 8> possibleDuplexes; possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB); - HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes); + HexagonMCShuffle(Context, MCII, STI, MCB, possibleDuplexes); } // Examines packet and pad the packet, if needed, when an // end-loop is in the bundle. @@ -87,7 +138,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, CheckOk = Check ? Check->check(true) : true; if (!CheckOk) return false; - HexagonMCShuffle(true, MCII, STI, MCB); + HexagonMCShuffle(Context, true, MCII, STI, MCB); return true; } @@ -292,7 +343,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, } StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, - MCInst const &MCI) { + MCInst const &MCI) { return MCII.getName(MCI.getOpcode()); } @@ -339,25 +390,6 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); } -int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, - MCInst const &MCI) { - const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - - HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>( - (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); - - switch (Target) { - default: - return Hexagon::ArchV4; - case HexagonII::HasV5SubT: - return Hexagon::ArchV5; - case HexagonII::HasV55SubT: - return Hexagon::ArchV55; - case HexagonII::HasV60SubT: - return Hexagon::ArchV60; - } -} - /// Return the slots this instruction can execute out of unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, @@ -397,9 +429,8 @@ bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; - for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (HexagonMCInstrInfo::isDuplex(MCII, *MI)) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCI)) { + if (HexagonMCInstrInfo::isDuplex(MCII, I)) return true; } @@ -410,13 +441,12 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { return extenderForIndex(MCB, Index) != nullptr; } -bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { +bool HexagonMCInstrInfo::hasImmExt( MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (isImmext(*MI)) + if (isImmext(*I.getInst())) return true; } @@ -505,6 +535,11 @@ bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, return (getType(MCII, MCI) == HexagonII::TypeCJ); } +bool HexagonMCInstrInfo::isCVINew(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::CVINewPos) & HexagonII::CVINewMask); +} + bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); @@ -732,16 +767,16 @@ bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) { HexagonMCExpr const &HExpr = cast<HexagonMCExpr>(Expr); return HExpr.mustNotExtend(); } -void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) { +void HexagonMCInstrInfo::setS27_2_reloc(MCExpr const &Expr, bool Val) { HexagonMCExpr &HExpr = const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr)); - HExpr.setS23_2_reloc(Val); + HExpr.setS27_2_reloc(Val); } -bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) { +bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) { HexagonMCExpr const *HExpr = llvm::dyn_cast<HexagonMCExpr>(&Expr); if (!HExpr) return false; - return HExpr->s23_2_reloc(); + return HExpr->s27_2_reloc(); } void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) { @@ -813,4 +848,4 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, return 0x1; return 0; } -} +} // namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 2e989adb5ccb..ca44c3a11ba7 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -31,6 +31,25 @@ public: DuplexCandidate(unsigned i, unsigned j, unsigned iClass) : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} }; +namespace Hexagon { +class PacketIterator { + MCInstrInfo const &MCII; + MCInst::const_iterator BundleCurrent; + MCInst::const_iterator BundleEnd; + MCInst::const_iterator DuplexCurrent; + MCInst::const_iterator DuplexEnd; + +public: + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst); + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst, std::nullptr_t); + PacketIterator &operator++(); + MCInst const &operator*() const; + bool operator==(PacketIterator const &Other) const; + bool operator!=(PacketIterator const &Other) const { + return !(*this == Other); + } +}; +} // namespace Hexagon namespace HexagonMCInstrInfo { size_t const innerLoopOffset = 0; int64_t const innerLoopMask = 1 << innerLoopOffset; @@ -54,6 +73,8 @@ void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, MCInst const &MCI); // Returns a iterator range of instructions in this bundle +iterator_range<Hexagon::PacketIterator> +bundleInstructions(MCInstrInfo const &MCII, MCInst const &MCI); iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); // Returns the number of instructions in the bundle @@ -131,7 +152,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, MCInst const &MCI); -int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); // Return the Hexagon ISA class for the insn. unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); @@ -180,6 +200,7 @@ bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCVINew(MCInstrInfo const &MCII, MCInst const &MCI); // Is this double register suitable for use in a duplex subinst bool isDblRegForSubInst(unsigned Reg); @@ -262,14 +283,14 @@ bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); // Replace the instructions inside MCB, represented by Candidate void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate); -bool s23_2_reloc(MCExpr const &Expr); +bool s27_2_reloc(MCExpr const &Expr); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); void setMemReorderDisabled(MCInst &MCI); void setMemStoreReorderEnabled(MCInst &MCI); void setMustExtend(MCExpr const &Expr, bool Val = true); void setMustNotExtend(MCExpr const &Expr, bool Val = true); -void setS23_2_reloc(MCExpr const &Expr, bool Val = true); +void setS27_2_reloc(MCExpr const &Expr, bool Val = true); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); @@ -282,7 +303,7 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer, // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCI); -} -} +} // namespace HexagonMCInstrInfo +} // namespace llvm #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index 529a5fd5ed82..aece36790486 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -45,6 +45,7 @@ void HexagonMCShuffler::init(MCInst &MCB) { } } + Loc = MCB.getLoc(); BundleFlags = MCB.getOperand(0).getImm(); } @@ -68,12 +69,14 @@ void HexagonMCShuffler::init(MCInst &MCB, MCInst const &AddMI, append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI)); } + Loc = MCB.getLoc(); BundleFlags = MCB.getOperand(0).getImm(); } void HexagonMCShuffler::copyTo(MCInst &MCB) { MCB.clear(); MCB.addOperand(MCOperand::createImm(BundleFlags)); + MCB.setLoc(Loc); // Copy the results into the bundle. for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { @@ -89,15 +92,16 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { if (shuffle()) { // Copy the results into the bundle. copyTo(MCB); - } else - DEBUG(MCB.dump()); - - return (!getError()); + return true; + } + DEBUG(MCB.dump()); + return false; } -bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, - MCSubtargetInfo const &STI, MCInst &MCB) { - HexagonMCShuffler MCS(true, MCII, STI, MCB); +bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) { + HexagonMCShuffler MCS(Context, Fatal, MCII, STI, MCB); if (DisableShuffle) // Ignore if user chose so. @@ -117,52 +121,16 @@ bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, return false; } - // Reorder the bundle and copy the result. - if (!MCS.reshuffleTo(MCB)) { - // Unless there is any error, which should not happen at this point. - unsigned shuffleError = MCS.getError(); - - if (!Fatal && (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS)) - return false; - if (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS) { - errs() << "\nFailing packet:\n"; - for (const auto& I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - MCInst *MI = const_cast<MCInst *>(I.getInst()); - errs() << HexagonMCInstrInfo::getName(MCII, *MI) << ' ' << HexagonMCInstrInfo::getDesc(MCII, *MI).getOpcode() << '\n'; - } - errs() << '\n'; - } - - switch (shuffleError) { - default: - llvm_unreachable("unknown error"); - case HexagonShuffler::SHUFFLE_ERROR_INVALID: - llvm_unreachable("invalid packet"); - case HexagonShuffler::SHUFFLE_ERROR_STORES: - llvm_unreachable("too many stores"); - case HexagonShuffler::SHUFFLE_ERROR_LOADS: - llvm_unreachable("too many loads"); - case HexagonShuffler::SHUFFLE_ERROR_BRANCHES: - llvm_unreachable("too many branches"); - case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS: - llvm_unreachable("no suitable slot"); - case HexagonShuffler::SHUFFLE_ERROR_SLOTS: - llvm_unreachable("over-subscribed slots"); - case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case. - return true; - } - } - - return true; + return MCS.reshuffleTo(MCB); } -unsigned -llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCContext &Context, MCInst &MCB, +bool +llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, SmallVector<DuplexCandidate, 8> possibleDuplexes) { if (DisableShuffle) - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; if (!HexagonMCInstrInfo::bundleSize(MCB)) { // There once was a bundle: @@ -172,46 +140,44 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. DEBUG(dbgs() << "Skipping empty bundle"); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } else if (!HexagonMCInstrInfo::isBundle(MCB)) { DEBUG(dbgs() << "Skipping stand-alone insn"); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } bool doneShuffling = false; - unsigned shuffleError; while (possibleDuplexes.size() > 0 && (!doneShuffling)) { // case of Duplex Found DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val(); MCInst Attempt(MCB); HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry); - HexagonMCShuffler MCS(true, MCII, STI, Attempt); // copy packet to the shuffler + HexagonMCShuffler MCS(Context, false, MCII, STI, Attempt); // copy packet to the shuffler if (MCS.size() == 1) { // case of one duplex // copy the created duplex in the shuffler to the bundle MCS.copyTo(MCB); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } // try shuffle with this duplex doneShuffling = MCS.reshuffleTo(MCB); - shuffleError = MCS.getError(); if (doneShuffling) break; } if (doneShuffling == false) { - HexagonMCShuffler MCS(true, MCII, STI, MCB); + HexagonMCShuffler MCS(Context, false, MCII, STI, MCB); doneShuffling = MCS.reshuffleTo(MCB); // shuffle - shuffleError = MCS.getError(); } if (!doneShuffling) - return shuffleError; + return true; - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } -bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &MCB, MCInst const &AddMI, int fixupCount) { +bool llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, + MCInst const &AddMI, int fixupCount) { if (!HexagonMCInstrInfo::isBundle(MCB)) return false; @@ -246,16 +212,6 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, if (bhasDuplex && bundleSize >= maxBundleSize) return false; - HexagonMCShuffler MCS(MCII, STI, MCB, AddMI, false); - if (!MCS.reshuffleTo(MCB)) { - unsigned shuffleError = MCS.getError(); - switch (shuffleError) { - default: - return false; - case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case - return true; - } - } - - return true; + HexagonMCShuffler MCS(Context, false, MCII, STI, MCB, AddMI, false); + return MCS.reshuffleTo(MCB); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h index 14bbfda4c914..dbe85b434dc4 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -18,24 +18,19 @@ #include "MCTargetDesc/HexagonShuffler.h" namespace llvm { - class MCInst; - // Insn bundle shuffler. class HexagonMCShuffler : public HexagonShuffler { - bool immext_present; - bool duplex_present; - public: - HexagonMCShuffler(bool Fatal, MCInstrInfo const &MCII, + HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &MCB) - : HexagonShuffler(MCII, STI) { + : HexagonShuffler(Context, Fatal, MCII, STI) { init(MCB); }; - HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &MCB, MCInst const &AddMI, - bool InsertAtFront) - : HexagonShuffler(MCII, STI) { + HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, + MCInst const &AddMI, bool InsertAtFront) + : HexagonShuffler(Context, Fatal, MCII, STI) { init(MCB, AddMI, InsertAtFront); }; @@ -44,22 +39,20 @@ public: // Reorder and copy result to another. bool reshuffleTo(MCInst &MCB); - bool immextPresent() const { return immext_present; }; - bool duplexPresent() const { return duplex_present; }; - private: void init(MCInst &MCB); void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront); }; // Invocation of the shuffler. -bool HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, +bool HexagonMCShuffle(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &); -bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &, MCInst const &, int); -unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCContext &Context, MCInst &, - SmallVector<DuplexCandidate, 8>); -} +bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &, MCInst const &, + int); +bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &, + SmallVector<DuplexCandidate, 8>); +} // namespace llvm #endif // HEXAGONMCSHUFFLER_H diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 853f76213d38..a5afa1daeb9e 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -14,17 +14,18 @@ #define DEBUG_TYPE "hexagon-shuffle" -#include <algorithm> -#include <utility> +#include "HexagonShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "HexagonShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <utility> using namespace llvm; @@ -38,7 +39,7 @@ class HexagonBid { unsigned Bid; public: - HexagonBid() : Bid(0){} + HexagonBid() : Bid(0) {} HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; } // Check if the insn priority is overflowed. @@ -87,7 +88,7 @@ unsigned HexagonResource::setWeight(unsigned s) { // Calculate relative weight of the insn for the given slot, weighing it the // heavier the more restrictive the insn is and the lowest the slots that the // insn may be executed in. - if (Key == 0 || Units == 0 || (SlotWeight*s >= 32)) + if (Key == 0 || Units == 0 || (SlotWeight * s >= 32)) return Weight = 0; unsigned Ctpop = countPopulation(Units); @@ -106,14 +107,12 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); (*TUL)[HexagonII::TypeCVI_VINLANESAT] = - (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") ? - UnitsAndLanes(CVI_SHIFT, 1) : - UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") + ? UnitsAndLanes(CVI_SHIFT, 1) + : UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_LD] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); - (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = - UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VM_ST] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); @@ -154,18 +153,19 @@ typedef SmallVector<struct CVIUnits, 8> HVXInstsT; static unsigned makeAllBits(unsigned startBit, unsigned Lanes) { - for (unsigned i = 1 ; i < Lanes ; ++i) + for (unsigned i = 1; i < Lanes; ++i) startBit = (startBit << 1) | startBit; return startBit; } -static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned usedUnits) +static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, + unsigned usedUnits) { if (startIdx < hvxInsts.size()) { if (!hvxInsts[startIdx].Units) return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits); - for (unsigned b = 0x1 ; b <= 0x8 ; b <<= 1) { + for (unsigned b = 0x1; b <= 0x8; b <<= 1) { if ((hvxInsts[startIdx].Units & b) == 0) continue; unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes); @@ -179,9 +179,10 @@ static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned return true; } -HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, +HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI) - : MCII(MCII), STI(STI) { + : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) { reset(); HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); } @@ -189,7 +190,6 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, void HexagonShuffler::reset() { Packet.clear(); BundleFlags = 0; - Error = SHUFFLE_SUCCESS; } void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, @@ -202,91 +202,31 @@ void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, static struct { unsigned first; unsigned second; -} jumpSlots[] = { {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1} }; -#define MAX_JUMP_SLOTS (sizeof(jumpSlots)/sizeof(jumpSlots[0])) - -namespace { -bool isDuplexAGroup(unsigned Opcode) { - switch (Opcode) { - case Hexagon::SA1_addi: - case Hexagon::SA1_addrx: - case Hexagon::SA1_addsp: - case Hexagon::SA1_and1: - case Hexagon::SA1_clrf: - case Hexagon::SA1_clrfnew: - case Hexagon::SA1_clrt: - case Hexagon::SA1_clrtnew: - case Hexagon::SA1_cmpeqi: - case Hexagon::SA1_combine0i: - case Hexagon::SA1_combine1i: - case Hexagon::SA1_combine2i: - case Hexagon::SA1_combine3i: - case Hexagon::SA1_combinerz: - case Hexagon::SA1_combinezr: - case Hexagon::SA1_dec: - case Hexagon::SA1_inc: - case Hexagon::SA1_seti: - case Hexagon::SA1_setin1: - case Hexagon::SA1_sxtb: - case Hexagon::SA1_sxth: - case Hexagon::SA1_tfr: - case Hexagon::SA1_zxtb: - case Hexagon::SA1_zxth: - return true; - break; - default: - return false; - } -} - -unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { - unsigned Result = 0; - unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); - if (Type == HexagonII::TypeDUPLEX) { - unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); - unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); - Result += !isDuplexAGroup(subInst0Opcode); - Result += !isDuplexAGroup(subInst1Opcode); - } else - Result += Type != HexagonII::TypeALU32_2op && - Type != HexagonII::TypeALU32_3op && - Type != HexagonII::TypeALU32_ADDI && - Type != HexagonII::TypeS_2op && - Type != HexagonII::TypeS_3op && - Type != HexagonII::TypeALU64 && - (Type != HexagonII::TypeM || - HexagonMCInstrInfo::isFloat(MCII, ID)); - return Result; -} -} +} jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; +#define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) /// Check that the packet is legal and enforce relative insn order. bool HexagonShuffler::check() { // Descriptive slot masks. const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, - slotThree = 0x8, //slotFirstJump = 0x8, + slotThree = 0x8, // slotFirstJump = 0x8, slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; // Highest slots for branches and stores used to keep their original order. - //unsigned slotJump = slotFirstJump; + // unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; // Number of branches, solo branches, indirect branches. unsigned jumps = 0, jump1 = 0; // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; - // Number of HVX loads, HVX stores. - unsigned CVIloads = 0, CVIstores = 0; - // Number of duplex insns, solo insns. - unsigned duplex = 0, solo = 0; - // Number of insns restricting other insns in the packet to A and X types, - // which is neither A or X types. - unsigned onlyAX = 0, neitherAnorX = 0; + // Number of duplex insns + unsigned duplex = 0; // Number of insns restricting other insns in slot #1 to A type. unsigned onlyAin1 = 0; // Number of insns restricting any insn in slot #1, except A2_nop. unsigned onlyNo1 = 0; - unsigned xtypeFloat = 0; unsigned pSlot3Cnt = 0; + unsigned nvstores = 0; unsigned memops = 0; unsigned deallocs = 0; iterator slot3ISJ = end(); @@ -297,13 +237,8 @@ bool HexagonShuffler::check() { for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const &ID = ISJ->getDesc(); - if (HexagonMCInstrInfo::isSolo(MCII, ID)) - solo++; - else if (HexagonMCInstrInfo::isSoloAX(MCII, ID)) - onlyAX++; - else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) - onlyAin1++; - neitherAnorX += countNeitherAnorX(MCII, ID); + if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) + ++onlyAin1; if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) { ++pSlot3Cnt; slot3ISJ = ISJ; @@ -316,8 +251,6 @@ bool HexagonShuffler::check() { case HexagonII::TypeS_2op: case HexagonII::TypeS_3op: case HexagonII::TypeALU64: - if (HexagonMCInstrInfo::isFloat(MCII, ID)) - ++xtypeFloat; break; case HexagonII::TypeJ: ++jumps; @@ -327,14 +260,11 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: - case HexagonII::TypeCVI_VM_CUR_LD: - ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad || - HexagonMCInstrInfo::getType(MCII, ID) == - HexagonII::TypeCVI_VM_VP_LDU) + HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) ++load0; if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) { ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. @@ -345,7 +275,6 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: - ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -368,18 +297,19 @@ bool HexagonShuffler::check() { } break; case HexagonII::TypeV2LDST: - if(HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) { + if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) { ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad || - HexagonMCInstrInfo::getType(MCII,ID) == + HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) ++load0; - } - else { + } else { assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()); ++memory; ++stores; + if (HexagonMCInstrInfo::isNewValue(MCII, ID)) + ++nvstores; } break; case HexagonII::TypeCR: @@ -406,30 +336,37 @@ bool HexagonShuffler::check() { ++jumps; foundBranches.push_back(ISJ); } + if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } + if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } break; } } } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || - (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || - (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { - Error = SHUFFLE_ERROR_INVALID; + if ((load0 > 1 || store0 > 1) || + (duplex > 1 || (duplex && memory))) { + reportError(llvm::Twine("invalid instruction packet")); return false; } if (jump1 && jumps > 1) { // Error if single branch with another branch. - Error = SHUFFLE_ERROR_BRANCHES; + reportError(llvm::Twine("too many branches in packet")); return false; } - if (memops && stores > 1) { - Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT; + if ((nvstores || memops) && stores > 1) { + reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store")); return false; } if (deallocs && stores) { - Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT; + reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store")); return false; } @@ -441,7 +378,6 @@ bool HexagonShuffler::check() { if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. - Error = SHUFFLE_ERROR_UNKNOWN; return false; } @@ -472,7 +408,8 @@ bool HexagonShuffler::check() { else if (stores > 1) { if (slotLoadStore < slotLastLoadStore) { // Error if no more slots available for stores. - Error = SHUFFLE_ERROR_STORES; + reportError( + llvm::Twine("invalid instruction packet: too many stores")); return false; } // Pin the store to the highest slot available to it. @@ -483,7 +420,7 @@ bool HexagonShuffler::check() { } if (store1 && stores > 1) { // Error if a single store with another store. - Error = SHUFFLE_ERROR_STORES; + reportError(llvm::Twine("invalid instruction packet: too many stores")); return false; } } @@ -494,7 +431,7 @@ bool HexagonShuffler::check() { if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. - Error = SHUFFLE_ERROR_NOSLOTS; + reportError(llvm::Twine("invalid instruction packet: out of slots")); return false; } } @@ -503,12 +440,12 @@ bool HexagonShuffler::check() { bool validateSlots = true; if (jumps > 1) { if (foundBranches.size() > 2) { - Error = SHUFFLE_ERROR_BRANCHES; + reportError(llvm::Twine("too many branches in packet")); return false; } // try all possible choices - for (unsigned int i = 0 ; i < MAX_JUMP_SLOTS ; ++i) { + for (unsigned int i = 0; i < MAX_JUMP_SLOTS; ++i) { // validate first jump with this slot rule if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits())) continue; @@ -535,18 +472,18 @@ bool HexagonShuffler::check() { if (!bFail) { validateSlots = false; // all good, no need to re-do auction break; - } - else + } else // restore original values Packet = PacketSave; } if (validateSlots == true) { - Error = SHUFFLE_ERROR_NOSLOTS; + reportError(llvm::Twine("invalid instruction packet: out of slots")); return false; } } - if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { + if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && + slot3ISJ != end()) { validateSlots = true; // save off slot mask of instruction marked with A_PREFER_SLOT3 // and then pin it to slot #3 @@ -582,7 +519,7 @@ bool HexagonShuffler::check() { for (iterator I = begin(); I != end(); ++I) if (!AuctionCore.bid(I->Core.getUnits())) { - Error = SHUFFLE_ERROR_SLOTS; + reportError(llvm::Twine("invalid instruction packet: slot error")); return false; } } @@ -605,12 +542,11 @@ bool HexagonShuffler::check() { startIdx = usedUnits = 0x0; if (checkHVXPipes(hvxInsts, startIdx, usedUnits) == false) { // too many pipes used to be valid - Error = SHUFFLE_ERROR_SLOTS; + reportError(llvm::Twine("invalid instruction packet: slot error")); return false; } } - Error = SHUFFLE_SUCCESS; return true; } @@ -618,12 +554,13 @@ bool HexagonShuffler::shuffle() { if (size() > HEXAGON_PACKET_SIZE) { // Ignore a packet with with more than what a packet can hold // or with compound or duplex insns for now. - Error = SHUFFLE_ERROR_INVALID; + reportError(llvm::Twine("invalid instruction packet")); return false; } // Check and prepare packet. - if (size() > 1 && check()) + bool Ok = true; + if (size() > 1 && (Ok = check())) // Reorder the handles for each slot. for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; ++nSlot) { @@ -659,5 +596,10 @@ bool HexagonShuffler::shuffle() { dbgs() << '\n'); DEBUG(dbgs() << '\n'); - return (!getError()); + return Ok; +} + +void HexagonShuffler::reportError(llvm::Twine const &Msg) { + if (ReportErrors) + Context.reportError(Loc, Msg); } diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 36e8fa19d467..10a959008f44 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -45,8 +45,7 @@ public: // Check if the resources are in ascending slot order. static bool lessUnits(const HexagonResource &A, const HexagonResource &B) { - return (countPopulation(A.getUnits()) < - countPopulation(B.getUnits())); + return (countPopulation(A.getUnits()) < countPopulation(B.getUnits())); }; // Check if the resources are in ascending weight order. static bool lessWeight(const HexagonResource &A, const HexagonResource &B) { @@ -107,7 +106,7 @@ public: HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T, MCInstrInfo const &MCII, MCInst const *id, MCInst const *Extender, unsigned s) - : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {} + : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}; MCInst const &getDesc() const { return *ID; }; @@ -136,33 +135,21 @@ class HexagonShuffler { HexagonPacket Packet; HexagonPacket PacketSave; - // Shuffling error code. - unsigned Error; - HexagonCVIResource::TypeUnitsAndLanes TUL; protected: + MCContext &Context; int64_t BundleFlags; MCInstrInfo const &MCII; MCSubtargetInfo const &STI; + SMLoc Loc; + bool ReportErrors; public: typedef HexagonPacket::iterator iterator; - enum { - SHUFFLE_SUCCESS = 0, ///< Successful operation. - SHUFFLE_ERROR_INVALID, ///< Invalid bundle. - SHUFFLE_ERROR_STORES, ///< No free slots for store insns. - SHUFFLE_ERROR_LOADS, ///< No free slots for load insns. - SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. - SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. - SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. - SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60). - SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict - SHUFFLE_ERROR_UNKNOWN ///< Unknown error. - }; - - explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI); + HexagonShuffler(MCContext &Context, bool ReportErrors, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI); // Reset to initial state. void reset(); @@ -180,9 +167,8 @@ public: void append(MCInst const &ID, MCInst const *Extender, unsigned S); // Return the error code for the last check or shuffling of the bundle. - void setError(unsigned Err) { Error = Err; }; - unsigned getError() const { return (Error); }; + void reportError(llvm::Twine const &Msg); }; -} +} // namespace llvm #endif // HEXAGONSHUFFLER_H diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp index b0532f933b16..726b7af73b0a 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp @@ -759,8 +759,13 @@ void Liveness::computeLiveIns() { // all related shadows as a single use cluster. RegisterRef S(RS.first, P.second); NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs); - for (NodeAddr<DefNode*> D : Ds) - LOX[S.Reg].insert({D.Id, S.Mask}); + for (NodeAddr<DefNode*> D : Ds) { + // Calculate the mask corresponding to the visited def. + RegisterAggr TA(PRI); + TA.insert(D.Addr->getRegRef(DFG)).intersect(S); + LaneBitmask TM = TA.makeRegRef().Mask; + LOX[S.Reg].insert({D.Id, TM}); + } } } diff --git a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp index 12a2571c28d9..fe54589f8b0d 100644 --- a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp @@ -264,12 +264,6 @@ LanaiRegisterInfo::getFrameRegister(const MachineFunction & /*MF*/) const { unsigned LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; } -bool LanaiRegisterInfo::canRealignStack(const MachineFunction &MF) const { - if (!TargetRegisterInfo::canRealignStack(MF)) - return false; - return true; -} - unsigned LanaiRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("no exception support"); return 0; diff --git a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h index c6e459076ebc..d88a19193854 100644 --- a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h +++ b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h @@ -41,8 +41,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; - bool canRealignStack(const MachineFunction &MF) const override; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 3304449efb91..1e2eb7dbec3e 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -366,6 +366,7 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_LDM", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 }, + { "fixup_MICROMIPS_GOTTPREL", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, @@ -437,6 +438,7 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOTTPREL", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 324fd3c6fe14..1a1c613cfce0 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -374,6 +374,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_MICROMIPS_TLS_DTPREL_HI16; case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16: return ELF::R_MICROMIPS_TLS_DTPREL_LO16; + case Mips::fixup_MICROMIPS_GOTTPREL: + return ELF::R_MICROMIPS_TLS_GOTTPREL; case Mips::fixup_MICROMIPS_TLS_TPREL_HI16: return ELF::R_MICROMIPS_TLS_TPREL_HI16; case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 149296212eca..6148a1b622c8 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -203,6 +203,9 @@ namespace Mips { // resulting in - R_MICROMIPS_TLS_DTPREL_LO16 fixup_MICROMIPS_TLS_DTPREL_LO16, + // resulting in - R_MICROMIPS_TLS_GOTTPREL. + fixup_MICROMIPS_GOTTPREL, + // resulting in - R_MICROMIPS_TLS_TPREL_HI16 fixup_MICROMIPS_TLS_TPREL_HI16, diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 5685f0426e9b..a35eb2a8e03a 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -669,7 +669,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups, : Mips::fixup_Mips_DTPREL_LO; break; case MipsMCExpr::MEK_GOTTPREL: - FixupKind = Mips::fixup_Mips_GOTTPREL; + FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOTTPREL + : Mips::fixup_Mips_GOTTPREL; break; case MipsMCExpr::MEK_GOT: FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOT16 diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp new file mode 100644 index 000000000000..4593fc92ca6f --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -0,0 +1,335 @@ +//=== MicroMipsSizeReduction.cpp - MicroMips size reduction pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +///\file +/// This pass is used to reduce the size of instructions where applicable. +/// +/// TODO: Implement microMIPS64 support. +/// TODO: Implement support for reducing into lwp/swp instruction. +//===----------------------------------------------------------------------===// +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "micromips-reduce-size" + +STATISTIC(NumReduced, "Number of 32-bit instructions reduced to 16-bit ones"); + +namespace { + +/// Order of operands to transfer +// TODO: Will be extended when additional optimizations are added +enum OperandTransfer { + OT_NA, ///< Not applicable + OT_OperandsAll, ///< Transfer all operands +}; + +/// Reduction type +// TODO: Will be extended when additional optimizations are added +enum ReduceType { + RT_OneInstr ///< Reduce one instruction into a smaller instruction +}; + +// Information about immediate field restrictions +struct ImmField { + ImmField() : ImmFieldOperand(-1), Shift(0), LBound(0), HBound(0) {} + ImmField(uint8_t Shift, int16_t LBound, int16_t HBound, + int8_t ImmFieldOperand) + : ImmFieldOperand(ImmFieldOperand), Shift(Shift), LBound(LBound), + HBound(HBound) {} + int8_t ImmFieldOperand; // Immediate operand, -1 if it does not exist + uint8_t Shift; // Shift value + int16_t LBound; // Low bound of the immediate operand + int16_t HBound; // High bound of the immediate operand +}; + +/// Information about operands +// TODO: Will be extended when additional optimizations are added +struct OpInfo { + OpInfo(enum OperandTransfer TransferOperands) + : TransferOperands(TransferOperands) {} + OpInfo() : TransferOperands(OT_NA) {} + + enum OperandTransfer + TransferOperands; ///< Operands to transfer to the new instruction +}; + +// Information about opcodes +struct OpCodes { + OpCodes(unsigned WideOpc, unsigned NarrowOpc) + : WideOpc(WideOpc), NarrowOpc(NarrowOpc) {} + + unsigned WideOpc; ///< Wide opcode + unsigned NarrowOpc; ///< Narrow opcode +}; + +/// ReduceTable - A static table with information on mapping from wide +/// opcodes to narrow +struct ReduceEntry { + + enum ReduceType eRType; ///< Reduction type + bool (*ReduceFunction)( + MachineInstr *MI, + const ReduceEntry &Entry); ///< Pointer to reduce function + struct OpCodes Ops; ///< All relevant OpCodes + struct OpInfo OpInf; ///< Characteristics of operands + struct ImmField Imm; ///< Characteristics of immediate field + + ReduceEntry(enum ReduceType RType, struct OpCodes Op, + bool (*F)(MachineInstr *MI, const ReduceEntry &Entry), + struct OpInfo OpInf, struct ImmField Imm) + : eRType(RType), ReduceFunction(F), Ops(Op), OpInf(OpInf), Imm(Imm) {} + + unsigned NarrowOpc() const { return Ops.NarrowOpc; } + unsigned WideOpc() const { return Ops.WideOpc; } + int16_t LBound() const { return Imm.LBound; } + int16_t HBound() const { return Imm.HBound; } + uint8_t Shift() const { return Imm.Shift; } + int8_t ImmField() const { return Imm.ImmFieldOperand; } + enum OperandTransfer TransferOperands() const { + return OpInf.TransferOperands; + } + enum ReduceType RType() const { return eRType; } + + // operator used by std::equal_range + bool operator<(const unsigned int r) const { return (WideOpc() < r); } + + // operator used by std::equal_range + friend bool operator<(const unsigned int r, const struct ReduceEntry &re) { + return (r < re.WideOpc()); + } +}; + +class MicroMipsSizeReduce : public MachineFunctionPass { +public: + static char ID; + MicroMipsSizeReduce(); + + static const MipsInstrInfo *MipsII; + const MipsSubtarget *Subtarget; + + bool runOnMachineFunction(MachineFunction &MF) override; + + llvm::StringRef getPassName() const override { + return "microMIPS instruction size reduction pass"; + } + +private: + /// Reduces width of instructions in the specified basic block. + bool ReduceMBB(MachineBasicBlock &MBB); + + /// Attempts to reduce MI, returns true on success. + bool ReduceMI(const MachineBasicBlock::instr_iterator &MII); + + // Attempts to reduce LW/SW instruction into LWSP/SWSP, + // returns true on success. + static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce arithmetic instructions, returns true on success + static bool ReduceArithmeticInstructions(MachineInstr *MI, + const ReduceEntry &Entry); + + // Changes opcode of an instruction + static bool ReplaceInstruction(MachineInstr *MI, const ReduceEntry &Entry); + + // Table with transformation rules for each instruction + static llvm::SmallVector<ReduceEntry, 16> ReduceTable; +}; + +char MicroMipsSizeReduce::ID = 0; +const MipsInstrInfo *MicroMipsSizeReduce::MipsII; + +// This table must be sorted by WideOpc as a main criterion and +// ReduceType as a sub-criterion (when wide opcodes are the same) +llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = { + + // ReduceType, OpCodes, ReduceFunction, + // OpInfo(TransferOperands), + // ImmField(Shift, LBound, HBound, ImmFieldPosition) + {RT_OneInstr, OpCodes(Mips::ADDu, Mips::ADDU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::SUBu_MM, Mips::SUBU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::SW, Mips::SWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SW_MM, Mips::SWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, +}; +} + +// Returns true if the machine operand MO is register SP +static bool IsSP(const MachineOperand &MO) { + if (MO.isReg() && ((MO.getReg() == Mips::SP))) + return true; + return false; +} + +// Returns true if the machine operand MO is register $16, $17, or $2-$7. +static bool isMMThreeBitGPRegister(const MachineOperand &MO) { + if (MO.isReg() && Mips::GPRMM16RegClass.contains(MO.getReg())) + return true; + return false; +} + +// Returns true if the operand Op is an immediate value +// and writes the immediate value into variable Imm +static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) { + + if (!MI->getOperand(Op).isImm()) + return false; + Imm = MI->getOperand(Op).getImm(); + return true; +} + +// Returns true if the variable Value has the number of least-significant zero +// bits equal to Shift and if the shifted value is between the bounds +static bool InRange(int64_t Value, unsigned short Shift, int LBound, + int HBound) { + int64_t Value2 = Value >> Shift; + if ((Value2 << Shift) == Value && (Value2 >= LBound) && (Value2 < HBound)) + return true; + return false; +} + +// Returns true if immediate operand is in range +static bool ImmInRange(MachineInstr *MI, const ReduceEntry &Entry) { + + int64_t offset; + + if (!GetImm(MI, Entry.ImmField(), offset)) + return false; + + if (!InRange(offset, Entry.Shift(), Entry.LBound(), Entry.HBound())) + return false; + + return true; +} + +MicroMipsSizeReduce::MicroMipsSizeReduce() : MachineFunctionPass(ID) {} + +bool MicroMipsSizeReduce::ReduceMI( + const MachineBasicBlock::instr_iterator &MII) { + + MachineInstr *MI = &*MII; + unsigned Opcode = MI->getOpcode(); + + // Search the table. + llvm::SmallVector<ReduceEntry, 16>::const_iterator Start = + std::begin(ReduceTable); + llvm::SmallVector<ReduceEntry, 16>::const_iterator End = + std::end(ReduceTable); + + std::pair<llvm::SmallVector<ReduceEntry, 16>::const_iterator, + llvm::SmallVector<ReduceEntry, 16>::const_iterator> + Range = std::equal_range(Start, End, Opcode); + + if (Range.first == Range.second) + return false; + + for (llvm::SmallVector<ReduceEntry, 16>::const_iterator Entry = Range.first; + Entry != Range.second; ++Entry) + if (((*Entry).ReduceFunction)(&(*MII), *Entry)) + return true; + + return false; +} + +bool MicroMipsSizeReduce::ReduceXWtoXWSP(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!IsSP(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceArithmeticInstructions( + MachineInstr *MI, const ReduceEntry &Entry) { + + if (!isMMThreeBitGPRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1)) || + !isMMThreeBitGPRegister(MI->getOperand(2))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) { + bool Modified = false; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), + E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; + + // Iterate through the instructions in the basic block + for (; MII != E; MII = NextMII) { + NextMII = std::next(MII); + MachineInstr *MI = &*MII; + + // Don't reduce bundled instructions or pseudo operations + if (MI->isBundle() || MI->isTransient()) + continue; + + // Try to reduce 32-bit instruction into 16-bit instruction + Modified |= ReduceMI(MII); + } + + return Modified; +} + +bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI, + const ReduceEntry &Entry) { + + MI->setDesc(MipsII->get(Entry.NarrowOpc())); + DEBUG(dbgs() << "Converted into 16-bit: " << *MI); + ++NumReduced; + return true; +} + +bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) { + + Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget()); + + // TODO: Add support for other subtargets: + // microMIPS32r6 and microMIPS64r6 + if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2()) + return false; + + MipsII = static_cast<const MipsInstrInfo *>(Subtarget->getInstrInfo()); + + bool Modified = false; + MachineFunction::iterator I = MF.begin(), E = MF.end(); + + for (; I != E; ++I) + Modified |= ReduceMBB(*I); + return Modified; +} + +/// Returns an instance of the MicroMips size reduction pass. +FunctionPass *llvm::createMicroMipsSizeReductionPass() { + return new MicroMipsSizeReduce(); +} diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h index d9faf3325cac..7553f3972f5d 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.h +++ b/contrib/llvm/lib/Target/Mips/Mips.h @@ -32,6 +32,7 @@ namespace llvm { FunctionPass *createMipsHazardSchedule(); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsConstantIslandPass(); + FunctionPass *createMicroMipsSizeReductionPass(); } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp index a71b161b24cc..5a394fe02f16 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -490,15 +490,14 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, // remove the use-soft-float attribute // static void removeUseSoftFloat(Function &F) { - AttributeList A; + AttrBuilder B; DEBUG(errs() << "removing -use-soft-float\n"); - A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex, - "use-soft-float", "false"); - F.removeAttributes(AttributeList::FunctionIndex, A); + B.addAttribute("use-soft-float", "false"); + F.removeAttributes(AttributeList::FunctionIndex, B); if (F.hasFnAttribute("use-soft-float")) { DEBUG(errs() << "still has -use-soft-float\n"); } - F.addAttributes(AttributeList::FunctionIndex, A); + F.addAttributes(AttributeList::FunctionIndex, B); } diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp index a5c7bf7699ea..21c99da0922d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1263,7 +1263,8 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, - CLI.Symbol->getName().data()); + CLI.Symbol ? CLI.Symbol->getName().data() + : nullptr); // Only handle a single return value. if (RVLocs.size() != 1) @@ -1326,11 +1327,10 @@ bool MipsFastISel::fastLowerArguments() { // Only handle simple cases. i.e. All arguments are directly mapped to // registers of the appropriate type. SmallVector<AllocatedReg, 4> Allocation; - unsigned Idx = 1; for (const auto &FormalArg : F->args()) { - if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (FormalArg.hasAttribute(Attribute::InReg) || + FormalArg.hasAttribute(Attribute::StructRet) || + FormalArg.hasAttribute(Attribute::ByVal)) { DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n"); return false; } @@ -1342,7 +1342,8 @@ bool MipsFastISel::fastLowerArguments() { } EVT ArgVT = TLI.getValueType(DL, ArgTy); - DEBUG(dbgs() << ".. " << (Idx - 1) << ": " << ArgVT.getEVTString() << "\n"); + DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": " + << ArgVT.getEVTString() << "\n"); if (!ArgVT.isSimple()) { DEBUG(dbgs() << ".. .. gave up (not a simple type)\n"); return false; @@ -1352,8 +1353,8 @@ bool MipsFastISel::fastLowerArguments() { case MVT::i1: case MVT::i8: case MVT::i16: - if (!F->getAttributes().hasAttribute(Idx, Attribute::SExt) && - !F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) { + if (!FormalArg.hasAttribute(Attribute::SExt) && + !FormalArg.hasAttribute(Attribute::ZExt)) { // It must be any extend, this shouldn't happen for clang-generated IR // so just fall back on SelectionDAG. DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n"); @@ -1374,7 +1375,7 @@ bool MipsFastISel::fastLowerArguments() { break; case MVT::i32: - if (F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) { + if (FormalArg.hasAttribute(Attribute::ZExt)) { // The O32 ABI does not permit a zero-extended i32. DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n"); return false; @@ -1437,23 +1438,20 @@ bool MipsFastISel::fastLowerArguments() { DEBUG(dbgs() << ".. .. gave up (unknown type)\n"); return false; } - - ++Idx; } - Idx = 0; for (const auto &FormalArg : F->args()) { - unsigned SrcReg = Allocation[Idx].Reg; - unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[Idx].RC); + unsigned ArgNo = FormalArg.getArgNo(); + unsigned SrcReg = Allocation[ArgNo].Reg; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). - unsigned ResultReg = createResultReg(Allocation[Idx].RC); + unsigned ResultReg = createResultReg(Allocation[ArgNo].RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(DstReg, getKillRegState(true)); updateValueMap(&FormalArg, ResultReg); - ++Idx; } // Calculate the size of the incoming arguments area. diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp index a45a9c4b41c3..29a38fd35c1f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -260,6 +260,7 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { // print out the code after the passes. void MipsPassConfig::addPreEmitPass() { MipsTargetMachine &TM = getMipsTargetMachine(); + addPass(createMicroMipsSizeReductionPass()); // The delay slot filler pass can potientially create forbidden slot (FS) // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ba28cd83278b..58cb7793d040 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1555,7 +1555,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align <a> .b8 .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex + 1); + unsigned align = PAL.getParamAlignment(paramIndex); if (align == 0) align = DL.getABITypeAlignment(Ty); @@ -1641,7 +1641,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align <a> .b8 .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex + 1); + unsigned align = PAL.getParamAlignment(paramIndex); if (align == 0) align = DL.getABITypeAlignment(ETy); // Work around a bug in ptxas. When PTX code takes address of diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index 5b626cbcd5ba..e858b37e1843 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo())); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 125c00295f88..1b0402bf003d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -542,12 +543,12 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op1 = N->getOperand(1); SDLoc dl(N); - APInt LKZ, LKO, RKZ, RKO; - CurDAG->computeKnownBits(Op0, LKZ, LKO); - CurDAG->computeKnownBits(Op1, RKZ, RKO); + KnownBits LKnown, RKnown; + CurDAG->computeKnownBits(Op0, LKnown); + CurDAG->computeKnownBits(Op1, RKnown); - unsigned TargetMask = LKZ.getZExtValue(); - unsigned InsertMask = RKZ.getZExtValue(); + unsigned TargetMask = LKnown.Zero.getZExtValue(); + unsigned InsertMask = RKnown.Zero.getZExtValue(); if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); @@ -590,9 +591,9 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. - APInt MKZ, MKO; - CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO); - bool CanFoldMask = InsertMask == MKO.getZExtValue(); + KnownBits MKnown; + CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); + bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && @@ -2772,12 +2773,12 @@ void PPCDAGToDAGISel::Select(SDNode *N) { short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - APInt LHSKnownZero, LHSKnownOne; - CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne); + KnownBits LHSKnown; + CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. - if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { + if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); return; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4659a2ea8032..483e9b171d57 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -79,6 +79,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -1847,17 +1848,14 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. - APInt LHSKnownZero, LHSKnownOne; - APInt RHSKnownZero, RHSKnownOne; - DAG.computeKnownBits(N.getOperand(0), - LHSKnownZero, LHSKnownOne); - - if (LHSKnownZero.getBoolValue()) { - DAG.computeKnownBits(N.getOperand(1), - RHSKnownZero, RHSKnownOne); + KnownBits LHSKnown, RHSKnown; + DAG.computeKnownBits(N.getOperand(0), LHSKnown); + + if (LHSKnown.Zero.getBoolValue()) { + DAG.computeKnownBits(N.getOperand(1), RHSKnown); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. - if (~(LHSKnownZero | RHSKnownZero) == 0) { + if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -1953,10 +1951,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. - APInt LHSKnownZero, LHSKnownOne; - DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); + KnownBits LHSKnown; + DAG.computeKnownBits(N.getOperand(0), LHSKnown); - if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = @@ -6466,7 +6464,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6476,25 +6474,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -10318,17 +10316,16 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. - APInt Op1Zero, Op1One; - APInt Op2Zero, Op2One; - DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); - DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); + KnownBits Op1Known, Op2Known; + DAG.computeKnownBits(N->getOperand(0), Op1Known); + DAG.computeKnownBits(N->getOperand(1), Op2Known); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. - Op1Zero.clearBit(0); Op1One.clearBit(0); - Op2Zero.clearBit(0); Op2One.clearBit(0); + Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); + Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); - if (Op1Zero != Op2Zero || Op1One != Op2One) + if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) return SDValue(); } } @@ -11216,6 +11213,14 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, } MVT VecTy = N->getValueType(0).getSimpleVT(); + + // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is + // aligned and the type is a vector with elements up to 4 bytes + if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) + && VecTy.getScalarSizeInBits() <= 32 ) { + return SDValue(); + } + SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), @@ -11280,6 +11285,13 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); + // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is + // aligned and the type is a vector with elements up to 4 bytes + if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) + && VecTy.getScalarSizeInBits() <= 32 ) { + return SDValue(); + } + // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); @@ -12015,18 +12027,17 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) - KnownZero = 0xFFFF0000; + Known.Zero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { @@ -12048,7 +12059,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: - KnownZero = ~1U; // All bits but the low one are known to be zero. + Known.Zero = ~1U; // All bits but the low one are known to be zero. break; } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 6113eb58f421..32661099b79d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -606,8 +606,7 @@ namespace llvm { SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -1018,6 +1017,14 @@ namespace llvm { SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + + bool supportsModuloShift(ISD::NodeType Inst, + EVT ReturnType) const override { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + assert(isOperationLegal(Inst, ReturnType)); + return ReturnType.isVector(); + } }; namespace PPC { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 13603732397a..967557452f24 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -138,7 +138,7 @@ let Uses = [RM] in { def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, - [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; + []>; } // mayLoad // Store indexed instructions @@ -160,7 +160,7 @@ let Uses = [RM] in { def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, - [(store v4i32:$XT, xoaddr:$dst)]>; + []>; } } // mayStore @@ -1041,8 +1041,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // Stores. def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), @@ -1053,8 +1051,12 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; } // Permutes. @@ -1890,8 +1892,8 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index f120a98e9457..c44e371856a5 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; @@ -1875,25 +1876,24 @@ EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, /// combiner. void SparcTargetLowering::computeKnownBitsForTargetNode (const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - APInt KnownZero2, KnownOne2; - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + KnownBits Known2; + Known.Zero.clearAllBits(); Known.One.clearAllBits(); switch (Op.getOpcode()) { default: break; case SPISD::SELECT_ICC: case SPISD::SELECT_XCC: case SPISD::SELECT_FCC: - DAG.computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - DAG.computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; } } diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h index 90d03984060c..cc6386bccbb1 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -66,8 +66,7 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 920b6e430e8f..cd2f708458bf 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -711,9 +712,9 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, // The inner check covers all cases but is more expensive. uint64_t Used = allOnes(Op.getValueSizeInBits()); if (Used != (AndMask | InsertMask)) { - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne); - if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + KnownBits Known; + CurDAG->computeKnownBits(Op.getOperand(0), Known); + if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue())) return false; } @@ -770,9 +771,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // If some bits of Input are already known zeros, those bits will have // been removed from the mask. See if adding them back in makes the // mask suitable. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Input, KnownZero, KnownOne); - Mask |= KnownZero.getZExtValue(); + KnownBits Known; + CurDAG->computeKnownBits(Input, Known); + Mask |= Known.Zero.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; } @@ -794,9 +795,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // If some bits of Input are already known ones, those bits will have // been removed from the mask. See if adding them back in makes the // mask suitable. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Input, KnownZero, KnownOne); - Mask &= ~KnownOne.getZExtValue(); + KnownBits Known; + CurDAG->computeKnownBits(Input, Known); + Mask &= ~Known.One.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index f2fd581f7847..6989aabb8c6a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,8 +20,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Support/CommandLine.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include <cctype> using namespace llvm; @@ -3066,14 +3067,14 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // Get the known-zero masks for each operand. SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; - APInt KnownZero[2], KnownOne[2]; - DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); - DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); + KnownBits Known[2]; + DAG.computeKnownBits(Ops[0], Known[0]); + DAG.computeKnownBits(Ops[1], Known[1]); // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. - uint64_t Masks[] = { KnownZero[0].getZExtValue(), - KnownZero[1].getZExtValue() }; + uint64_t Masks[] = { Known[0].Zero.getZExtValue(), + Known[1].Zero.getZExtValue() }; unsigned High, Low; if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) High = 1, Low = 0; @@ -3158,9 +3159,9 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, } // Get the known-zero mask for the operand. - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Op, KnownZero, KnownOne); - unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + KnownBits Known; + DAG.computeKnownBits(Op, Known); + unsigned NumSignificantBits = (~Known.Zero).getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index e74c9a80515d..f56b238f91e6 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -530,9 +530,10 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0))) OpTy = CI->getOperand(0)->getType(); else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0))) - if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) - if (isa<CmpInst>(LogicI->getOperand(1))) - OpTy = CI0->getOperand(0)->getType(); + if (LogicI->getNumOperands() == 2) + if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) + if (isa<CmpInst>(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); if (OpTy != nullptr) { if (VF == 1) { @@ -676,7 +677,6 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT const Instruction *I) { if (ValTy->isVectorTy()) { assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); - assert (CondTy == nullptr || CondTy->isVectorTy()); unsigned VF = ValTy->getVectorNumElements(); // Called with a compare instruction. diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp index 50272fda56de..91cc97e38b3d 100644 --- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -44,7 +44,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, const TargetMachine &TM) { Ctx = &ctx; // `Initialize` can be called more than once. - if (Mang != nullptr) delete Mang; + delete Mang; Mang = new Mangler(); InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), TM.getCodeModel(), *Ctx); diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index a67137f867e7..257f1d110aa2 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 25d77bb1f234..365b327190ec 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -26,18 +26,18 @@ // offset for an add that needs wrapping. def regPlusImm : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), - [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + [{ return N->getFlags().hasNoUnsignedWrap(); }]>; // Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - APInt KnownZero0, KnownOne0; - CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; + KnownBits Known0; + CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); + KnownBits Known1; + CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); + return (~Known0.Zero & ~Known1.Zero) == 0; }]>; // GlobalAddresses are conceptually unsigned values, so we can also fold them @@ -47,7 +47,7 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ def regPlusGA : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), [{ - return N->getFlags()->hasNoUnsignedWrap(); + return N->getFlags().hasNoUnsignedWrap(); }]>; // We don't need a regPlusES because external symbols never have constant diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index d2f650cf8f47..784c3a6557ff 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -170,6 +170,8 @@ def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", [FeatureSSE2]>; def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", "Enable TBM instructions">; +def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", + "Enable LWP instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", @@ -691,6 +693,7 @@ def : Proc<"bdver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, + FeatureLWP, FeatureSlowSHLD, FeatureLAHFSAHF ]>; @@ -713,6 +716,7 @@ def : Proc<"bdver2", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureSlowSHLD, FeatureLAHFSAHF @@ -737,6 +741,7 @@ def : Proc<"bdver3", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, @@ -763,6 +768,7 @@ def : Proc<"bdver4", [ FeatureBMI, FeatureBMI2, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index b8477810b4c9..ebd179e786da 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -3074,16 +3074,13 @@ bool X86FastISel::fastLowerArguments() { // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; - unsigned Idx = 0; for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); @@ -3184,6 +3181,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + const CallInst *CI = + CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; + const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + + // Functions with no_caller_saved_registers that need special handling. + if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || + (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2d788bf0cf99..12a10bf3072f 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -419,6 +420,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { case ISD::ADD: case ISD::ADDC: case ISD::ADDE: + case ISD::ADDCARRY: case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -1070,9 +1072,9 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, } APInt MaskedHighBits = APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(X, KnownZero, KnownOne); - if (MaskedHighBits != KnownZero) return true; + KnownBits Known; + DAG.computeKnownBits(X, Known); + if (MaskedHighBits != Known.Zero) return true; // We've identified a pattern that can be transformed into a single shift // and an addressing mode. Make it so. diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index ada46643a5fe..83542aaa013b 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -784,30 +785,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SMIN, MVT::v8i16, Legal); setOperationAction(ISD::UMIN, MVT::v16i8, Legal); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v16i8, Custom); - setOperationAction(ISD::SETCC, MVT::v8i16, Custom); - setOperationAction(ISD::SETCC, MVT::v4i32, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); - setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); - setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); - setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); - - setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); - setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); - setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); - setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Custom); + } - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -882,18 +871,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); - for (auto VT : { MVT::v8i16, MVT::v16i8 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - } - - // In the customized shift lowering, the legal cases in AVX2 will be - // recognized. - for (auto VT : { MVT::v4i32, MVT::v2i64 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); + // In the customized shift lowering, the legal v4i32/v2i64 cases + // in AVX2 will be recognized. + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); } } @@ -935,13 +918,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); - - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); + for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); @@ -950,19 +930,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); - - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); + for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { + setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); + } // i8 vectors are custom because the source register and source // source memory operand types are not the same width. @@ -1026,36 +1001,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); - for (auto VT : { MVT::v32i8, MVT::v16i16 }) { + // In the customized shift lowering, the legal v8i32/v4i64 cases + // in AVX2 will be recognized. + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); } - setOperationAction(ISD::SETCC, MVT::v32i8, Custom); - setOperationAction(ISD::SETCC, MVT::v16i16, Custom); - setOperationAction(ISD::SETCC, MVT::v8i32, Custom); - setOperationAction(ISD::SETCC, MVT::v4i64, Custom); - setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); + for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } + setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); setOperationAction(ISD::CTLZ, VT, Custom); @@ -1103,27 +1073,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X - setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - - setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - } - - // In the customized shift lowering, the legal cases in AVX2 will be - // recognized. - for (auto VT : { MVT::v8i32, MVT::v4i64 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); + for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { + setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); + } } for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, @@ -1272,19 +1229,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v8i1, Expand); setOperationAction(ISD::VSELECT, MVT::v16i1, Expand); if (Subtarget.hasDQI()) { - setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); - + for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) { + setOperationAction(ISD::SINT_TO_FP, VT, Legal); + setOperationAction(ISD::UINT_TO_FP, VT, Legal); + setOperationAction(ISD::FP_TO_SINT, VT, Legal); + setOperationAction(ISD::FP_TO_UINT, VT, Legal); + } if (Subtarget.hasVLX()) { // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion. setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); @@ -1334,11 +1284,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); - setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::FNEARBYINT, VT, Legal); } setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom); @@ -1357,7 +1307,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); - setOperationAction(ISD::MUL, MVT::v8i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); @@ -1372,15 +1322,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8i1, Custom); - setOperationAction(ISD::SMAX, MVT::v16i32, Legal); - setOperationAction(ISD::SMAX, MVT::v8i64, Legal); - setOperationAction(ISD::UMAX, MVT::v16i32, Legal); - setOperationAction(ISD::UMAX, MVT::v8i64, Legal); - setOperationAction(ISD::SMIN, MVT::v16i32, Legal); - setOperationAction(ISD::SMIN, MVT::v8i64, Legal); - setOperationAction(ISD::UMIN, MVT::v16i32, Legal); - setOperationAction(ISD::UMIN, MVT::v8i64, Legal); - setOperationAction(ISD::ADD, MVT::v8i1, Custom); setOperationAction(ISD::ADD, MVT::v16i1, Custom); setOperationAction(ISD::SUB, MVT::v8i1, Custom); @@ -1391,12 +1332,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v16i32, Legal); for (auto VT : { MVT::v16i32, MVT::v8i64 }) { - setOperationAction(ISD::ABS, VT, Legal); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Custom); } // Need to promote to 64-bit even though we have 32-bit masked instructions @@ -1540,15 +1485,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v64i1, Expand); setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); - setOperationAction(ISD::SMAX, MVT::v64i8, Legal); - setOperationAction(ISD::SMAX, MVT::v32i16, Legal); - setOperationAction(ISD::UMAX, MVT::v64i8, Legal); - setOperationAction(ISD::UMAX, MVT::v32i16, Legal); - setOperationAction(ISD::SMIN, MVT::v64i8, Legal); - setOperationAction(ISD::SMIN, MVT::v32i16, Legal); - setOperationAction(ISD::UMIN, MVT::v64i8, Legal); - setOperationAction(ISD::UMIN, MVT::v32i16, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); @@ -1579,6 +1515,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); setOperationPromotedToType(ISD::AND, VT, MVT::v8i64); setOperationPromotedToType(ISD::OR, VT, MVT::v8i64); @@ -1652,6 +1592,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::USUBO, VT, Custom); setOperationAction(ISD::SMULO, VT, Custom); setOperationAction(ISD::UMULO, VT, Custom); + + // Support carry in as value rather than glue. + setOperationAction(ISD::ADDCARRY, VT, Custom); + setOperationAction(ISD::SUBCARRY, VT, Custom); } if (!Subtarget.is64Bit()) { @@ -2236,6 +2180,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + // In some cases we need to disable registers from the default CSR list. + // For example, when they are used for argument passing. + bool ShouldDisableCalleeSavedRegister = + CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers"); + if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); @@ -2257,7 +2207,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.isRegLoc() && "Can only return in registers!"); // Add the register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); SDValue ValToCopy = OutVals[OutsIndex]; @@ -2336,7 +2286,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, "Expecting two registers after Pass64BitArgInRegs"); // Add the second register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); @@ -2396,7 +2346,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); // Add the returned register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } @@ -2596,7 +2546,7 @@ SDValue X86TargetLowering::LowerCallResult( // In some calling conventions we need to remove the used registers // from the register mask. - if (RegMask && CallConv == CallingConv::X86_RegCall) { + if (RegMask) { for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); @@ -3293,7 +3243,8 @@ SDValue X86TargetLowering::LowerFormalArguments( } } - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || + Fn->hasFnAttribute("no_caller_saved_registers")) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) MF.getRegInfo().disableCalleeSavedRegister(Pair.first); @@ -3385,6 +3336,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + const CallInst *CI = + CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; + const Function *Fn = CI ? CI->getCalledFunction() : nullptr; + bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); @@ -3797,7 +3753,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); + // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we + // set X86_INTR calling convention because it has the same CSR mask + // (same preserved registers). + const uint32_t *Mask = RegInfo->getCallPreservedMask( + MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based @@ -3820,7 +3780,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // In some calling conventions we need to remove the used physical registers // from the reg mask. - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || HasNCSR) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. @@ -4839,14 +4799,10 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { return false; // The index should be aligned on a vecWidth-bit boundary. - uint64_t Index = - cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(1); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getScalarSizeInBits(); - bool Result = (Index * ElSize) % vecWidth == 0; - - return Result; + return (Index * ElSize) % vecWidth == 0; } /// Return true if the specified INSERT_SUBVECTOR @@ -4856,15 +4812,12 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"); if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) return false; - // The index should be aligned on a vecWidth-bit boundary. - uint64_t Index = - cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + // The index should be aligned on a vecWidth-bit boundary. + uint64_t Index = N->getConstantOperandVal(2); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getScalarSizeInBits(); - bool Result = (Index * ElSize) % vecWidth == 0; - - return Result; + return (Index * ElSize) % vecWidth == 0; } bool X86::isVINSERT128Index(SDNode *N) { @@ -4888,13 +4841,9 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) && "Illegal extract subvector for VEXTRACT"); - uint64_t Index = - cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(1); MVT VecVT = N->getOperand(0).getSimpleValueType(); - MVT ElVT = VecVT.getVectorElementType(); - - unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); + unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits(); return Index / NumElemsPerChunk; } @@ -4903,13 +4852,9 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) && "Illegal insert subvector for VINSERT"); - uint64_t Index = - cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(2); MVT VecVT = N->getSimpleValueType(0); - MVT ElVT = VecVT.getVectorElementType(); - - unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); + unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits(); return Index / NumElemsPerChunk; } @@ -4942,9 +4887,9 @@ bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); } -// Build a vector of constants +// Build a vector of constants. // Use an UNDEF node if MaskElt == -1. -// Spilt 64-bit constants in the 32-bit mode. +// Split 64-bit constants in the 32-bit mode. static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask = false) { @@ -5428,8 +5373,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, unsigned BitOffset) { if (!Cst) return false; - unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); if (isa<UndefValue>(Cst)) { + unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); Undefs.setBits(BitOffset, BitOffset + CstSizeInBits); return true; } @@ -6641,18 +6586,16 @@ static bool isUseOfShuffle(SDNode *N) { return false; } -/// Attempt to use the vbroadcast instruction to generate a splat value for the -/// following cases: -/// 1. A splat BUILD_VECTOR which uses: -/// a. A single scalar load, or a constant. -/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>). -/// 2. A splat shuffle which uses a scalar_to_vector node which comes from -/// a scalar load, or a constant. +/// Attempt to use the vbroadcast instruction to generate a splat value +/// from a splat BUILD_VECTOR which uses: +/// a. A single scalar load, or a constant. +/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>). /// /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { // VBROADCAST requires AVX. // TODO: Splats could be generated for non-AVX CPUs using SSE // instructions, but there's less potential gain for only 128-bit vectors. @@ -7605,7 +7548,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; - if (SDValue Broadcast = LowerVectorBroadcast(BV, Subtarget, DAG)) + if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) return Broadcast; if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG)) return BitOp; @@ -9843,7 +9786,6 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. -/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, @@ -16337,11 +16279,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case ISD::ADD: case ISD::SUB: case ISD::MUL: - case ISD::SHL: { - const auto *BinNode = cast<BinaryWithFlagsSDNode>(Op.getNode()); - if (BinNode->Flags.hasNoSignedWrap()) + case ISD::SHL: + if (Op.getNode()->getFlags().hasNoSignedWrap()) break; - } default: NeedOF = true; break; @@ -16799,9 +16739,9 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { - APInt Zeros, Ones; - DAG.computeKnownBits(Op0, Zeros, Ones); - if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + KnownBits Known; + DAG.computeKnownBits(Op0, Known); + if (Known.Zero.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } LHS = Op1; @@ -19120,8 +19060,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, if (Op.getOpcode() == X86ISD::FSETCCM || Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); - if (Op.getOpcode() == X86ISD::VFPCLASS || - Op.getOpcode() == X86ISD::VFPCLASSS) + if (Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::OR, dl, VT, Op, IMask); if (PreservedSrc.isUndef()) @@ -20360,16 +20299,17 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); + const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { - if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) + switch (IntNo) { + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_seh_ehguard) + case llvm::Intrinsic::x86_seh_ehguard: return MarkEHGuard(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_flags_read_u32 || - IntNo == llvm::Intrinsic::x86_flags_read_u64 || - IntNo == llvm::Intrinsic::x86_flags_write_u32 || - IntNo == llvm::Intrinsic::x86_flags_write_u64) { + case llvm::Intrinsic::x86_flags_read_u32: + case llvm::Intrinsic::x86_flags_read_u64: + case llvm::Intrinsic::x86_flags_write_u32: + case llvm::Intrinsic::x86_flags_write_u64: { // We need a frame pointer because this will get lowered to a PUSH/POP // sequence. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -20378,6 +20318,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // during ExpandISelPseudos in EmitInstrWithCustomInserter. return SDValue(); } + case Intrinsic::x86_lwpins32: + case Intrinsic::x86_lwpins64: { + SDLoc dl(Op); + SDValue Chain = Op->getOperand(0); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue LwpIns = + DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2), + Op->getOperand(3), Op->getOperand(4)); + SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG); + SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + LwpIns.getValue(1)); + } + } return SDValue(); } @@ -23351,6 +23305,35 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + MVT VT = N->getSimpleValueType(0); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDLoc DL(N); + + // Set the carry flag. + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), + Carry, DAG.getConstant(NegOne, DL, CarryVT)); + + unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB; + SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); +} + static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); @@ -23862,6 +23845,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: case ISD::SUB: return LowerADD_SUB(Op, DAG); case ISD::SMAX: @@ -24522,6 +24507,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; + case X86ISD::LWPINS: return "X86ISD::LWPINS"; } return nullptr; } @@ -26667,12 +26653,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, //===----------------------------------------------------------------------===// void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); assert((Opc >= ISD::BUILTIN_OP_END || @@ -26682,7 +26667,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known.Zero.clearAllBits(); Known.One.clearAllBits(); switch (Opc) { default: break; case X86ISD::ADD: @@ -26701,33 +26686,33 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; LLVM_FALLTHROUGH; case X86ISD::SETCC: - KnownZero.setBits(1, BitWidth); + Known.Zero.setBitsFrom(1); break; case X86ISD::MOVMSK: { unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); - KnownZero.setBits(NumLoBits, BitWidth); + Known.Zero.setBitsFrom(NumLoBits); break; } case X86ISD::VSHLI: case X86ISD::VSRLI: { if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) { - KnownZero = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); break; } - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); unsigned ShAmt = ShiftImm->getZExtValue(); if (Opc == X86ISD::VSHLI) { - KnownZero = KnownZero << ShAmt; - KnownOne = KnownOne << ShAmt; + Known.Zero <<= ShAmt; + Known.One <<= ShAmt; // Low bits are known zero. - KnownZero.setLowBits(ShAmt); + Known.Zero.setLowBits(ShAmt); } else { - KnownZero.lshrInPlace(ShAmt); - KnownOne.lshrInPlace(ShAmt); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); // High bits are known zero. - KnownZero.setHighBits(ShAmt); + Known.Zero.setHighBits(ShAmt); } } break; @@ -26741,12 +26726,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned InBitWidth = SrcVT.getScalarSizeInBits(); assert(InNumElts >= NumElts && "Illegal VZEXT input"); - KnownZero = KnownOne = APInt(InBitWidth, 0); + Known = KnownBits(InBitWidth); APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts); - DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1); - KnownOne = KnownOne.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownZero.setBits(InBitWidth, BitWidth); + DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1); + Known.One = Known.One.zext(BitWidth); + Known.Zero = Known.Zero.zext(BitWidth); + Known.Zero.setBitsFrom(InBitWidth); break; } } @@ -30206,12 +30191,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask(APInt::getSignMask(BitWidth)); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || - TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, - TLO)) { + TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) { // If we changed the computation somewhere in the DAG, this change will // affect all users of Cond. Make sure it is fine and update all the nodes // so that we do not use the generic VSELECT anymore. Otherwise, we may @@ -31056,8 +31040,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - const APInt &ShAmt = N1C->getAPIntValue(); - Mask = Mask.shl(ShAmt); + Mask <<= N1C->getAPIntValue(); bool MaskOK = false; // We can handle cases concerning bit-widening nodes containing setcc_c if // we carefully interrogate the mask to make sure we are semantics @@ -31267,9 +31250,9 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, unsigned ShiftImm = ShiftVal.getZExtValue(); for (APInt &Elt : EltBits) { if (X86ISD::VSHLI == Opcode) - Elt = Elt.shl(ShiftImm); + Elt <<= ShiftImm; else if (X86ISD::VSRAI == Opcode) - Elt = Elt.ashr(ShiftImm); + Elt.ashrInPlace(ShiftImm); else Elt.lshrInPlace(ShiftImm); } @@ -33481,7 +33464,7 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, // use of a constant by performing (-0 - A*B) instead. // FIXME: Check rounding control flags as well once it becomes available. if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && - Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) { + Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Zero); @@ -33775,12 +33758,12 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, if (Op1.hasOneUse()) { unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || - TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) + TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } return SDValue(); @@ -33842,8 +33825,8 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, return SDValue(); bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; - bool NSW = Add->getFlags()->hasNoSignedWrap(); - bool NUW = Add->getFlags()->hasNoUnsignedWrap(); + bool NSW = Add->getFlags().hasNoSignedWrap(); + bool NUW = Add->getFlags().hasNoUnsignedWrap(); // We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding // into the 'zext' @@ -33883,7 +33866,7 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, SDNodeFlags Flags; Flags.setNoSignedWrap(NSW); Flags.setNoUnsignedWrap(NUW); - return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, &Flags); + return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); } /// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) -> @@ -34486,6 +34469,34 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS +static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG, + X86TargetLowering::DAGCombinerInfo &DCI) { + // When legalizing carry, we create carries via add X, -1 + // If that comes from an actual carry, via setcc, we use the + // carry directly. + if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) { + SDValue Carry = N->getOperand(0); + while (Carry.getOpcode() == ISD::TRUNCATE || + Carry.getOpcode() == ISD::ZERO_EXTEND || + Carry.getOpcode() == ISD::SIGN_EXTEND || + Carry.getOpcode() == ISD::ANY_EXTEND || + (Carry.getOpcode() == ISD::AND && + isOneConstant(Carry.getOperand(1)))) + Carry = Carry.getOperand(0); + + if (Carry.getOpcode() == ISD::SETCC || + Carry.getOpcode() == X86ISD::SETCC || + Carry.getOpcode() == X86ISD::SETCC_CARRY) { + auto *Cond = cast<ConstantSDNode>(Carry.getOperand(0)); + if (Cond->getZExtValue() == X86::COND_B) + return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1)); + } + } + + return SDValue(); +} + // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue combineADC(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { @@ -34740,8 +34751,8 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; - if (Flags->hasVectorReduction()) { + const SDNodeFlags Flags = N->getFlags(); + if (Flags.hasVectorReduction()) { if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget)) return Sad; if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget)) @@ -35047,6 +35058,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); + case X86ISD::ADD: return combineX86ADD(N, DAG, DCI); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); case ISD::SHL: @@ -35171,14 +35183,21 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { /// know that the code that lowers COPY of EFLAGS has to use the stack, and if /// we don't adjust the stack we clobber the first frame index. /// See X86InstrInfo::copyPhysReg. -bool X86TargetLowering::hasCopyImplyingStackAdjustment( - MachineFunction *MF) const { - const MachineRegisterInfo &MRI = MF->getRegInfo(); - +static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); return any_of(MRI.reg_instructions(X86::EFLAGS), [](const MachineInstr &RI) { return RI.isCopy(); }); } +void X86TargetLowering::finalizeLowering(MachineFunction &MF) const { + if (hasCopyImplyingStackAdjustment(MF)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setHasCopyImplyingStackAdjustment(true); + } + + TargetLoweringBase::finalizeLowering(MF); +} + /// This method query the target whether it is beneficial for dag combiner to /// promote the specified node. If true, it should return the desired promotion /// type by reference. diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 190a88335000..18106c2eb394 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -559,6 +559,9 @@ namespace llvm { // Conversions between float and half-float. CVTPS2PH, CVTPH2PS, + // LWP insert record. + LWPINS, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, @@ -773,10 +776,6 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; - /// Return true if the MachineFunction contains a COPY which would imply - /// HasOpaqueSPAdjustment. - bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override; - MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -828,8 +827,7 @@ namespace llvm { /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -1066,6 +1064,9 @@ namespace llvm { ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const override; + + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index e592c2b3c0aa..3dc673e3c35a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1271,11 +1271,11 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - APInt KnownZero0, KnownOne0; - CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; + KnownBits Known0; + CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); + KnownBits Known1; + CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); + return (~Known0.Zero & ~Known1.Zero) == 0; }]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 26444dd1f619..888daa275265 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -821,6 +821,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, + // LWP foldable instructions + { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, + { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, + { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, + { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, + // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index c3def461afdc..cdf7ce19cdc8 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -283,6 +283,11 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86lwpins : SDNode<"X86ISD::LWPINS", + SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -836,6 +841,7 @@ def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasTBM : Predicate<"Subtarget->hasTBM()">; +def HasLWP : Predicate<"Subtarget->hasLWP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -877,7 +883,9 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">, def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" - "Subtarget->getFrameLowering()->hasFP(*MF)">; + "Subtarget->getFrameLowering()->hasFP(*MF)"> { + let RecomputePerFunction = 1; +} def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -887,9 +895,9 @@ def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" "TM.getCodeModel() == CodeModel::Kernel">; def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; -def OptForSize : Predicate<"OptForSize">; -def OptForMinSize : Predicate<"OptForMinSize">; -def OptForSpeed : Predicate<"!OptForSize">; +def OptForSize : Predicate<"Subtarget->getOptForSize()">; +def OptForMinSize : Predicate<"Subtarget->getOptForMinSize()">; +def OptForSpeed : Predicate<"!Subtarget->getOptForSize()">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; @@ -2442,6 +2450,59 @@ defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>; } // HasTBM, EFLAGS //===----------------------------------------------------------------------===// +// Lightweight Profiling Instructions + +let Predicates = [HasLWP] in { + +def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR32:$src)], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; +def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst", + [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; + +def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR64:$src)], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; +def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", + [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; + +multiclass lwpins_intr<RegisterClass RC> { + def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>, + XOP_4V, XOPA; +} + +let Defs = [EFLAGS] in { + defm LWPINS32 : lwpins_intr<GR32>; + defm LWPINS64 : lwpins_intr<GR64>, VEX_W; +} // EFLAGS + +multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> { + def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, GR32:$src1, imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; +} + +defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>; +defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W; + +} // HasLWP + +//===----------------------------------------------------------------------===// // MONITORX/MWAITX Instructions // let SchedRW = [ WriteSystem ] in { diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp index d0f1b7091da9..38f7bc0af5c7 100644 --- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -48,7 +48,6 @@ public: X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); - void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -56,11 +55,9 @@ private: /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; - // TODO: remove after selectImpl support pattern with a predicate. + // TODO: remove after suported by Tablegen-erated instruction selection. unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const; unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const; - unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const; - unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const; unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, uint64_t Alignment) const; @@ -80,12 +77,10 @@ private: const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; - bool OptForSize; - bool OptForMinSize; - PredicateBitset AvailableFeatures; - PredicateBitset computeAvailableFeatures(const MachineFunction *MF, - const X86Subtarget *Subtarget) const; +#define GET_GLOBALISEL_PREDICATES_DECL +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL #define GET_GLOBALISEL_TEMPORARIES_DECL #include "X86GenGlobalISel.inc" @@ -102,8 +97,10 @@ X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false), - OptForMinSize(false), AvailableFeatures() + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT #define GET_GLOBALISEL_TEMPORARIES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -153,10 +150,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - (void)DstSize; unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); - (void)SrcSize; + assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && "No phys reg on generic operators"); assert((DstSize == SrcSize || @@ -172,6 +168,18 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, case X86::GPRRegBankID: assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + + // Change the physical register + if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (RC == &X86::GR32RegClass) + I.getOperand(1).setSubReg(X86::sub_32bit); + else if (RC == &X86::GR16RegClass) + I.getOperand(1).setSubReg(X86::sub_16bit); + else if (RC == &X86::GR8RegClass) + I.getOperand(1).setSubReg(X86::sub_8bit); + + I.getOperand(1).substPhysReg(SrcReg, TRI); + } break; case X86::VECRRegBankID: RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); @@ -195,12 +203,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -void X86InstructionSelector::beginFunction(const MachineFunction &MF) { - OptForSize = MF.getFunction()->optForSize(); - OptForMinSize = MF.getFunction()->optForMinSize(); - AvailableFeatures = computeAvailableFeatures(&MF, &STI); -} - bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -223,8 +225,12 @@ bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); - // TODO: This should be implemented by tblgen, pattern with predicate not - // supported yet. + if (selectImpl(I)) + return true; + + DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); + + // TODO: This should be implemented by tblgen. if (selectBinaryOp(I, MRI, MF)) return true; if (selectLoadStoreOp(I, MRI, MF)) @@ -236,7 +242,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const { if (selectTrunc(I, MRI, MF)) return true; - return selectImpl(I); + return false; } unsigned X86InstructionSelector::getFAddOp(LLT &Ty, @@ -309,44 +315,6 @@ unsigned X86InstructionSelector::getFSubOp(LLT &Ty, return TargetOpcode::G_FSUB; } -unsigned X86InstructionSelector::getAddOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_ADD; - - if (Ty == LLT::vector(4, 32)) { - if (STI.hasAVX512() && STI.hasVLX()) { - return X86::VPADDDZ128rr; - } else if (STI.hasAVX()) { - return X86::VPADDDrr; - } else if (STI.hasSSE2()) { - return X86::PADDDrr; - } - } - - return TargetOpcode::G_ADD; -} - -unsigned X86InstructionSelector::getSubOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_SUB; - - if (Ty == LLT::vector(4, 32)) { - if (STI.hasAVX512() && STI.hasVLX()) { - return X86::VPSUBDZ128rr; - } else if (STI.hasAVX()) { - return X86::VPSUBDrr; - } else if (STI.hasSSE2()) { - return X86::PSUBDrr; - } - } - - return TargetOpcode::G_SUB; -} - bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -364,12 +332,6 @@ bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, case TargetOpcode::G_FSUB: NewOpc = getFSubOp(Ty, RB); break; - case TargetOpcode::G_ADD: - NewOpc = getAddOp(Ty, RB); - break; - case TargetOpcode::G_SUB: - NewOpc = getSubOp(Ty, RB); - break; default: break; } @@ -396,7 +358,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, } else if (Ty == LLT::scalar(16)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV16rm : X86::MOV16mr; - } else if (Ty == LLT::scalar(32)) { + } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV32rm : X86::MOV32mr; if (X86::VECRRegBankID == RB.getID()) @@ -404,7 +366,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); - } else if (Ty == LLT::scalar(64)) { + } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV64rm : X86::MOV64mr; if (X86::VECRRegBankID == RB.getID()) diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp index c2dc762fec5e..a437f6bf4714 100644 --- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -71,6 +71,15 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); + + // Extensions + setAction({G_ZEXT, s32}, Legal); + setAction({G_SEXT, s32}, Legal); + + for (auto Ty : {s8, s16}) { + setAction({G_ZEXT, 1, Ty}, Legal); + setAction({G_SEXT, 1, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfo64bit() { @@ -105,6 +114,17 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { setAction({TargetOpcode::G_CONSTANT, Ty}, Legal); setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); + + // Extensions + for (auto Ty : {s32, s64}) { + setAction({G_ZEXT, Ty}, Legal); + setAction({G_SEXT, Ty}, Legal); + } + + for (auto Ty : {s8, s16, s32}) { + setAction({G_ZEXT, 1, Ty}, Legal); + setAction({G_SEXT, 1, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoSSE1() { diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index debb192732e5..7be0a7fd4067 100644 --- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -221,6 +223,8 @@ public: StringRef getPassName() const override { return "X86 LEA Optimize"; } + bool doInitialization(Module &M) override; + /// \brief Loop over all of the basic blocks, replacing address /// calculations in load and store instructions, if it's already /// been calculated by LEA. Also, remove redundant LEAs. @@ -262,6 +266,12 @@ private: /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(MemOpMap &LEAs); + /// Replace debug value MI with a new debug value instruction using register + /// VReg with an appropriate offset and DIExpression to incorporate the + /// address displacement AddrDispShift. Return new debug value instruction. + MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned VReg, + int64_t AddrDispShift); + /// \brief Removes LEAs which calculate similar addresses. bool removeRedundantLEAs(MemOpMap &LEAs); @@ -270,6 +280,7 @@ private: MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; + Module *TheModule; static char ID; }; @@ -532,6 +543,25 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { return Changed; } +MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, + unsigned VReg, + int64_t AddrDispShift) { + DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression()); + + if (AddrDispShift != 0) + Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift, + DIExpression::WithStackValue); + + // Replace DBG_VALUE instruction with modified version. + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + bool IsIndirect = MI.isIndirectDebugValue(); + int64_t Offset = IsIndirect ? MI.getOperand(1).getImm() : 0; + const MDNode *Var = MI.getDebugVariable(); + return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, VReg, Offset, Var, Expr); +} + // Try to find similar LEAs in the list and replace one with another. bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { bool Changed = false; @@ -563,13 +593,21 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // Loop over all uses of the Last LEA and update their operands. Note // that the correctness of this has already been checked in the // isReplaceable function. + unsigned FirstVReg = First.getOperand(0).getReg(); unsigned LastVReg = Last.getOperand(0).getReg(); - for (auto UI = MRI->use_nodbg_begin(LastVReg), - UE = MRI->use_nodbg_end(); + for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end(); UI != UE;) { MachineOperand &MO = *UI++; MachineInstr &MI = *MO.getParent(); + if (MI.isDebugValue()) { + // Replace DBG_VALUE instruction with modified version using the + // register from the replacing LEA and the address displacement + // between the LEA instructions. + replaceDebugValue(MI, FirstVReg, AddrDispShift); + continue; + } + // Get the number of the first memory operand. const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = @@ -577,7 +615,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { X86II::getOperandBias(Desc); // Update address base. - MO.setReg(First.getOperand(0).getReg()); + MO.setReg(FirstVReg); // Update address disp. MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp); @@ -587,11 +625,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { Op.setOffset(Op.getOffset() + AddrDispShift); } - // Mark debug values referring to Last LEA as undefined. - MRI->markUsesInDebugValueAsUndef(LastVReg); - // Since we can possibly extend register lifetime, clear kill flags. - MRI->clearKillFlags(First.getOperand(0).getReg()); + MRI->clearKillFlags(FirstVReg); ++NumRedundantLEAs; DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump();); @@ -614,6 +649,11 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { return Changed; } +bool OptimizeLEAPass::doInitialization(Module &M) { + TheModule = &M; + return false; +} + bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index 1f16f3c9a14d..cf2ceef8013a 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -276,7 +276,14 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); - switch (MF->getFunction()->getCallingConv()) { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + + // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling + // convention because it has the CSR list. + if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) + CC = CallingConv::X86_INTR; + + switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td index 7f7efd7cad3f..4eae6ca7abe3 100644 --- a/contrib/llvm/lib/Target/X86/X86Schedule.td +++ b/contrib/llvm/lib/Target/X86/X86Schedule.td @@ -497,6 +497,7 @@ def IIC_IN_RI : InstrItinClass; def IIC_OUT_RR : InstrItinClass; def IIC_OUT_IR : InstrItinClass; def IIC_INS : InstrItinClass; +def IIC_LWP : InstrItinClass; def IIC_MOV_REG_DR : InstrItinClass; def IIC_MOV_DR_REG : InstrItinClass; def IIC_MOV_REG_CR : InstrItinClass; diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 1a72a0ba3a64..d4b2392eb1f5 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -44,8 +44,26 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } +namespace { + +// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT +// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is +// always smaller than the block size). +struct RepMovsRepeats { + RepMovsRepeats(uint64_t Size) : Size(Size) {} + + uint64_t Count() const { return Size / UBytes(); } + uint64_t BytesLeft() const { return Size % UBytes(); } + uint64_t UBytes() const { return AVT.getSizeInBits() / 8; } + + const uint64_t Size; + MVT AVT = MVT::i8; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( - SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -69,10 +87,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { + if (const char *bzeroEntry = ValC && + ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -104,7 +122,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue InFlag; EVT AVT; SDValue Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); unsigned BytesLeft = 0; if (ValC) { unsigned ValReg; @@ -147,7 +165,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal, dl); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); InFlag = Chain.getValue(1); } @@ -171,7 +189,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), - Src, + Val, DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile, false, DstPtrInfo.getWithOffset(Offset)); @@ -181,24 +199,6 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( return Chain; } -namespace { - -// Represents a cover of a buffer of SizeVal bytes with blocks of size -// AVT, as well as how many bytes remain (BytesLeft is always smaller than -// the block size). -struct RepMovsRepeats { - RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) { - const unsigned UBytes = AVT.getSizeInBits() / 8; - Count = SizeVal / UBytes; - BytesLeft = SizeVal % UBytes; - } - - unsigned Count; - unsigned BytesLeft; -}; - -} // namespace - SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, @@ -210,8 +210,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getMachineFunction().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) + RepMovsRepeats Repeats(ConstantSize->getZExtValue()); + if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However @@ -232,35 +232,31 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); - MVT AVT; - if (Subtarget.hasERMSB()) - // If the target has enhanced REPMOVSB, then it's at least as fast to use - // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle - // BytesLeft. - AVT = MVT::i8; - else if (Align & 1) - AVT = MVT::i8; - else if (Align & 2) - AVT = MVT::i16; - else if (Align & 4) - // DWORD aligned - AVT = MVT::i32; - else - // QWORD aligned - AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - - RepMovsRepeats Repeats(SizeVal, AVT); - if (Repeats.BytesLeft > 0 && - DAG.getMachineFunction().getFunction()->optForMinSize()) { - // When agressively optimizing for size, avoid generating the code to handle - // BytesLeft. - AVT = MVT::i8; - Repeats = RepMovsRepeats(SizeVal, AVT); + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + if (!Subtarget.hasERMSB() && !(Align & 1)) { + if (Align & 2) + // WORD aligned + Repeats.AVT = MVT::i16; + else if (Align & 4) + // DWORD aligned + Repeats.AVT = MVT::i32; + else + // QWORD aligned + Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + if (Repeats.BytesLeft() > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to + // handle BytesLeft. + Repeats.AVT = MVT::i8; + } } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - DAG.getIntPtrConstant(Repeats.Count, dl), InFlag); + DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -270,14 +266,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); - if (Repeats.BytesLeft) { + if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - Repeats.BytesLeft; + unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -288,7 +284,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(Repeats.BytesLeft, dl, + DAG.getConstant(Repeats.BytesLeft(), dl, SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 4154530d04e7..9ab751e2b002 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -265,6 +265,7 @@ void X86Subtarget::initializeEnvironment() { HasFMA4 = false; HasXOP = false; HasTBM = false; + HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; @@ -290,6 +291,9 @@ void X86Subtarget::initializeEnvironment() { HasMWAITX = false; HasCLZERO = false; HasMPX = false; + HasSGX = false; + HasCLFLUSHOPT = false; + HasCLWB = false; IsBTMemSlow = false; IsPMULLDSlow = false; IsSHLDSlow = false; @@ -326,7 +330,8 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride) + unsigned StackAlignOverride, bool OptForSize, + bool OptForMinSize) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), @@ -335,8 +340,9 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering(*this, getStackAlignment()), OptForSize(OptForSize), + OptForMinSize(OptForMinSize) { // Determine the PICStyle based on the target selected. if (!isPositionIndependent()) setPICStyle(PICStyles::None); diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index fd057f36c890..de1514243aeb 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -124,6 +124,9 @@ protected: /// Target has TBM instructions. bool HasTBM; + /// Target has LWP instructions + bool HasLWP; + /// True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -328,12 +331,16 @@ private: X86TargetLowering TLInfo; X86FrameLowering FrameLowering; + bool OptForSize; + bool OptForMinSize; + public: /// This constructor initializes the data members to match that /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride); + const X86TargetMachine &TM, unsigned StackAlignOverride, + bool OptForSize, bool OptForMinSize); /// This object will take onwership of \p GISelAccessor. void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); } @@ -443,6 +450,7 @@ public: bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } + bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } @@ -499,6 +507,9 @@ public: bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } + bool getOptForSize() const { return OptForSize; } + bool getOptForMinSize() const { return OptForMinSize; } + /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor /// supports it. diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 623cf38aa951..086f55dd60b5 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -268,6 +268,12 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { FS = Key.substr(CPU.size()); + bool OptForSize = F.optForSize(); + bool OptForMinSize = F.optForMinSize(); + + Key += std::string(OptForSize ? "+" : "-") + "optforsize"; + Key += std::string(OptForMinSize ? "+" : "-") + "optforminsize"; + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -275,7 +281,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride); + Options.StackAlignmentOverride, + OptForSize, OptForMinSize); #ifndef LLVM_BUILD_GLOBAL_ISEL GISelAccessor *GISel = new GISelAccessor(); #else @@ -286,7 +293,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI)); + GISel->InstSelector.reset(createX86InstructionSelector( + *this, *I, *RBI)); #endif I->setGISelAccessor(*GISel); } diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp index bc14630584e5..500b26b3be17 100644 --- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp @@ -412,7 +412,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { // Can't use musttail due to prototype mismatch, but we can use tail. Call->setTailCall(true); // Set inreg so we pass it in EAX. - Call->addAttribute(1, Attribute::InReg); + Call->addParamAttr(0, Attribute::InReg); Builder.CreateRet(Call); return Trampoline; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 2efcd46cd8d4..4d3ecf25dc34 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -406,9 +407,9 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset( static bool isWordAligned(SDValue Value, SelectionDAG &DAG) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Value, KnownZero, KnownOne); - return KnownZero.countTrailingOnes() >= 2; + KnownBits Known; + DAG.computeKnownBits(Value, Known); + return Known.Zero.countTrailingOnes() >= 2; } SDValue XCoreTargetLowering:: @@ -1601,13 +1602,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (OutVal.hasOneUse()) { unsigned BitWidth = OutVal.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || - TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, - TLO)) + TLI.SimplifyDemandedBits(OutVal, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1618,13 +1618,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (Time.hasOneUse()) { unsigned BitWidth = Time.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || - TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, - TLO)) + TLI.SimplifyDemandedBits(Time, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1655,11 +1654,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops[] = { Result, Carry }; @@ -1678,11 +1677,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N2); @@ -1694,11 +1693,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops[] = { Result, Borrow }; @@ -1822,20 +1821,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, } void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); } break; case ISD::INTRINSIC_W_CHAIN: @@ -1844,24 +1842,24 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, switch (IntNo) { case Intrinsic::xcore_getts: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 16); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 16); break; case Intrinsic::xcore_int: case Intrinsic::xcore_inct: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 8); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 8); break; case Intrinsic::xcore_testct: // Result is either 0 or 1. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); break; case Intrinsic::xcore_testwct: // Result is in the range 0 - 4. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 3); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 3); break; } } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h index 188f4f1fa06b..452d5b046055 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -200,8 +200,7 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 5cc51cd7a992..87532d11ede8 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -128,11 +128,11 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) { do { - SmallVector<WeakVH,8> WUsers(CE->user_begin(), CE->user_end()); + SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end()); std::sort(WUsers.begin(), WUsers.end()); WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end()); while (!WUsers.empty()) - if (WeakVH WU = WUsers.pop_back_val()) { + if (WeakTrackingVH WU = WUsers.pop_back_val()) { if (PHINode *PN = dyn_cast<PHINode>(WU)) { for (int I = 0, E = PN->getNumIncomingValues(); I < E; ++I) if (PN->getIncomingValue(I) == CE) { @@ -159,12 +159,12 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) { } static bool rewriteNonInstructionUses(GlobalVariable *GV, Pass *P) { - SmallVector<WeakVH,8> WUsers; + SmallVector<WeakTrackingVH, 8> WUsers; for (User *U : GV->users()) if (!isa<Instruction>(U)) - WUsers.push_back(WeakVH(U)); + WUsers.push_back(WeakTrackingVH(U)); while (!WUsers.empty()) - if (WeakVH WU = WUsers.pop_back_val()) { + if (WeakTrackingVH WU = WUsers.pop_back_val()) { ConstantExpr *CE = dyn_cast<ConstantExpr>(WU); if (!CE || !replaceConstantExprOp(CE, P)) return false; diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index ab648f884c5b..12eb16789825 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -216,8 +216,8 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Function *NewF = Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, F.getName() + Suffix, M); - NewF->addAttribute(1, Attribute::NonNull); - NewF->addAttribute(1, Attribute::NoAlias); + NewF->addParamAttr(0, Attribute::NonNull); + NewF->addParamAttr(0, Attribute::NoAlias); ValueToValueMapTy VMap; // Replace all args with undefs. The buildCoroutineFrame algorithm already @@ -245,9 +245,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, // Remove old return attributes. NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType()))); + AttributeFuncs::typeIncompatible(NewF->getReturnType())); // Make AllocaSpillBlock the new entry block. auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]); diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index a2c8a32dfe86..6408cad08d55 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, AttributeList PAL = F->getAttributes(); // First, determine the new argument list - unsigned ArgIndex = 0; + unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++ArgIndex) { + ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); - ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex)); + ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; @@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); - ArgIndex = 1; + ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgIndex) + ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument - ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } // Push any varargs arguments on the list. - for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); - ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; @@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); - F->setAttributes( - F->getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + F->removeParamAttr(ArgNo, Attribute::StructRet); + F->addParamAttr(ArgNo, Attribute::NoAlias); for (Use &U : F->uses()) { CallSite CS(U.getUser()); - CS.setAttributes( - CS.getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, - Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + CS.removeParamAttr(ArgNo, Attribute::StructRet); + CS.addParamAttr(ArgNo, Attribute::NoAlias); } } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 9648883b7f27..28cc81c76d4f 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -835,7 +835,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // pointers. for (Function *F : SCCNodes) { // Already noalias. - if (F->doesNotAlias(0)) + if (F->returnDoesNotAlias()) continue; // We can infer and propagate function attributes only when we know that the @@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { bool MadeChange = false; for (Function *F : SCCNodes) { - if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) + if (F->returnDoesNotAlias() || + !F->getReturnType()->isPointerTy()) continue; - F->setDoesNotAlias(0); + F->setReturnDoesNotAlias(); ++NumNoAlias; MadeChange = true; } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp index d66411f04cc4..c7ef2494e3b8 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IntrinsicInst.h" @@ -25,7 +26,6 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index ae9d4ce11e0d..f277a51ae659 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // we delete a constant array, we may also be holding pointer to one of its // elements (or an element of one of its elements if we're dealing with an // array of arrays) in the worklist. - SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end()); + SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end()); while (!WorkList.empty()) { Value *UV = WorkList.pop_back_val(); if (!UV) @@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) { NewU->insertBefore(UI); UI->replaceUsesOfWith(U, NewU); } - U->dropAllReferences(); + // We've replaced all the uses, so destroy the constant. (destroyConstant + // will update value handles and metadata.) + U->destroyConstant(); } } diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 771770ddc060..0e478ba607be 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -207,11 +207,13 @@ private: /// A work queue of functions that may have been modified and should be /// analyzed again. - std::vector<WeakVH> Deferred; + std::vector<WeakTrackingVH> Deferred; /// Checks the rules of order relation introduced among functions set. /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector<WeakVH> &Worklist); +#ifndef NDEBUG + bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); +#endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. @@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { +#ifndef NDEBUG +bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { if (const unsigned Max = NumFunctionsForSanityCheck) { unsigned TripleNumber = 0; bool Valid = true; @@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; unsigned i = 0; - for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E && i < Max; ++I, ++i) { unsigned j = i; - for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max; + ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); @@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; unsigned k = j; - for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max; ++k, ++K, ++TripleNumber) { if (K == J) continue; @@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { } return true; } +#endif bool MergeFunctions::runOnModule(Module &M) { if (skipModule(M)) @@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) { // consider merging it. Otherwise it is dropped and never considered again. if ((I != S && std::prev(I)->first == I->first) || (std::next(I) != IE && std::next(I)->first == I->first) ) { - Deferred.push_back(WeakVH(I->second)); + Deferred.push_back(WeakTrackingVH(I->second)); } } do { - std::vector<WeakVH> Worklist; + std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); DEBUG(doSanityCheck(Worklist)); @@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) { DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); // Insert functions and merge them. - for (WeakVH &I : Worklist) { + for (WeakTrackingVH &I : Worklist) { if (!I) continue; Function *F = cast<Function>(I); diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 78e71c18fe29..2db47b3b5622 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -16,8 +16,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -31,13 +35,18 @@ using namespace llvm; #define DEBUG_TYPE "partial-inlining" -STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +STATISTIC(NumPartialInlined, + "Number of callsites functions partially inlined into."); // Command line option to disable partial-inlining. The default is false: static cl::opt<bool> DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial ininling")); +static cl::opt<unsigned> MaxNumInlineBlocks( + "max-num-inline-blocks", cl::init(5), cl::Hidden, + cl::desc("Max Number of Blocks To be Partially Inlined")); + // Command line option to set the maximum number of partial inlining allowed // for the module. The default value of -1 means no limit. static cl::opt<int> MaxNumPartialInlining( @@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining( cl::desc("Max number of partial inlining. The default is unlimited")); namespace { + +struct FunctionOutliningInfo { + FunctionOutliningInfo() + : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), + ReturnBlockPreds() {} + // Returns the number of blocks to be inlined including all blocks + // in Entries and one return block. + unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } + + // A set of blocks including the function entry that guard + // the region to be outlined. + SmallVector<BasicBlock *, 4> Entries; + // The return block that is not included in the outlined region. + BasicBlock *ReturnBlock; + // The dominating block of the region ot be outlined. + BasicBlock *NonReturnBlock; + // The set of blocks in Entries that that are predecessors to ReturnBlock + SmallVector<BasicBlock *, 4> ReturnBlockPreds; +}; + struct PartialInlinerImpl { - PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} + PartialInlinerImpl( + std::function<AssumptionCache &(Function &)> *GetAC, + std::function<TargetTransformInfo &(Function &)> *GTTI, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); Function *unswitchFunction(Function *F); + std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); + private: - InlineFunctionInfo IFI; int NumPartialInlining = 0; + std::function<AssumptionCache &(Function &)> *GetAssumptionCache; + std::function<TargetTransformInfo &(Function &)> *GetTTI; + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; + ProfileSummaryInfo *PSI; + bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE); bool IsLimitReached() { return (MaxNumPartialInlining != -1 && NumPartialInlining >= MaxNumPartialInlining); } }; + struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid PartialInlinerLegacyPass() : ModulePass(ID) { @@ -67,91 +108,329 @@ struct PartialInlinerLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); + TargetTransformInfoWrapperPass *TTIWP = + &getAnalysis<TargetTransformInfoWrapperPass>(); + ProfileSummaryInfo *PSI = + getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - return PartialInlinerImpl(IFI).run(M); + + std::function<TargetTransformInfo &(Function &)> GetTTI = + [&TTIWP](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }; + + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); } }; } +std::unique_ptr<FunctionOutliningInfo> +PartialInlinerImpl::computeOutliningInfo(Function *F) { + BasicBlock *EntryBlock = &F->front(); + BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); + if (!BR || BR->isUnconditional()) + return std::unique_ptr<FunctionOutliningInfo>(); + + // Returns true if Succ is BB's successor + auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { + return is_contained(successors(BB), Succ); + }; + + auto SuccSize = [](BasicBlock *BB) { + return std::distance(succ_begin(BB), succ_end(BB)); + }; + + auto IsReturnBlock = [](BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + return isa<ReturnInst>(TI); + }; + + auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsReturnBlock(Succ1)) + return std::make_tuple(Succ1, Succ2); + if (IsReturnBlock(Succ2)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); + }; + + // Detect a triangular shape: + auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsSuccessor(Succ1, Succ2)) + return std::make_tuple(Succ1, Succ2); + if (IsSuccessor(Succ2, Succ1)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); + }; + + std::unique_ptr<FunctionOutliningInfo> OutliningInfo = + llvm::make_unique<FunctionOutliningInfo>(); + + BasicBlock *CurrEntry = EntryBlock; + bool CandidateFound = false; + do { + // The number of blocks to be inlined has already reached + // the limit. When MaxNumInlineBlocks is set to 0 or 1, this + // disables partial inlining for the function. + if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) + break; + + if (SuccSize(CurrEntry) != 2) + break; + + BasicBlock *Succ1 = *succ_begin(CurrEntry); + BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + + if (ReturnBlock) { + OutliningInfo->Entries.push_back(CurrEntry); + OutliningInfo->ReturnBlock = ReturnBlock; + OutliningInfo->NonReturnBlock = NonReturnBlock; + CandidateFound = true; + break; + } + + BasicBlock *CommSucc; + BasicBlock *OtherSucc; + std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); + + if (!CommSucc) + break; + + OutliningInfo->Entries.push_back(CurrEntry); + CurrEntry = OtherSucc; + + } while (true); + + if (!CandidateFound) + return std::unique_ptr<FunctionOutliningInfo>(); + + // Do sanity check of the entries: threre should not + // be any successors (not in the entry set) other than + // {ReturnBlock, NonReturnBlock} + assert(OutliningInfo->Entries[0] == &F->front()); + DenseSet<BasicBlock *> Entries; + for (BasicBlock *E : OutliningInfo->Entries) + Entries.insert(E); + + // Returns true of BB has Predecessor which is not + // in Entries set. + auto HasNonEntryPred = [Entries](BasicBlock *BB) { + for (auto Pred : predecessors(BB)) { + if (!Entries.count(Pred)) + return true; + } + return false; + }; + auto CheckAndNormalizeCandidate = + [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { + for (BasicBlock *E : OutliningInfo->Entries) { + for (auto Succ : successors(E)) { + if (Entries.count(Succ)) + continue; + if (Succ == OutliningInfo->ReturnBlock) + OutliningInfo->ReturnBlockPreds.push_back(E); + else if (Succ != OutliningInfo->NonReturnBlock) + return false; + } + // There should not be any outside incoming edges either: + if (HasNonEntryPred(E)) + return false; + } + return true; + }; + + if (!CheckAndNormalizeCandidate(OutliningInfo.get())) + return std::unique_ptr<FunctionOutliningInfo>(); + + // Now further growing the candidate's inlining region by + // peeling off dominating blocks from the outlining region: + while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { + BasicBlock *Cand = OutliningInfo->NonReturnBlock; + if (SuccSize(Cand) != 2) + break; + + if (HasNonEntryPred(Cand)) + break; + + BasicBlock *Succ1 = *succ_begin(Cand); + BasicBlock *Succ2 = *(succ_begin(Cand) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) + break; + + if (NonReturnBlock->getSinglePredecessor() != Cand) + break; + + // Now grow and update OutlininigInfo: + OutliningInfo->Entries.push_back(Cand); + OutliningInfo->NonReturnBlock = NonReturnBlock; + OutliningInfo->ReturnBlockPreds.push_back(Cand); + Entries.insert(Cand); + } + + return OutliningInfo; +} + +bool PartialInlinerImpl::shouldPartialInline(CallSite CS, + OptimizationRemarkEmitter &ORE) { + // TODO : more sharing with shouldInline in Inliner.cpp + using namespace ore; + Instruction *Call = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + auto &CalleeTTI = (*GetTTI)(*Callee); + InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, + *GetAssumptionCache, GetBFI, PSI); + + if (IC.isAlways()) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + << NV("Callee", Callee) + << " should always be fully inlined, not partially"); + return false; + } + + if (IC.isNever()) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) + << " because it should never be inlined (cost=never)"); + return false; + } + + if (!IC) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not partially inlined into " + << NV("Caller", Caller) << " because too costly to inline (cost=" + << NV("Cost", IC.getCost()) << ", threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return false; + } + + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + << NV("Callee", Callee) << " can be partially inlined into " + << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) + << " (threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return true; +} + Function *PartialInlinerImpl::unswitchFunction(Function *F) { - // First, verify that this function is an unswitching candidate... + if (F->hasAddressTaken()) return nullptr; - BasicBlock *EntryBlock = &F->front(); - BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); - if (!BR || BR->isUnconditional()) + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) return nullptr; - BasicBlock *ReturnBlock = nullptr; - BasicBlock *NonReturnBlock = nullptr; - unsigned ReturnCount = 0; - for (BasicBlock *BB : successors(EntryBlock)) { - if (isa<ReturnInst>(BB->getTerminator())) { - ReturnBlock = BB; - ReturnCount++; - } else - NonReturnBlock = BB; - } + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + + std::unique_ptr<FunctionOutliningInfo> OutliningInfo = + computeOutliningInfo(F); - if (ReturnCount != 1) + if (!OutliningInfo) return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); - DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); - BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); - BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); + BasicBlock *NewReturnBlock = + cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]); + BasicBlock *NewNonReturnBlock = + cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]); + DenseSet<BasicBlock *> NewEntries; + for (BasicBlock *BB : OutliningInfo->Entries) { + NewEntries.insert(cast<BasicBlock>(VMap[BB])); + } // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. F->replaceAllUsesWith(DuplicateFunction); + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast<PHINode>(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. BasicBlock *PreReturn = NewReturnBlock; - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); - BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); - while (I != PreReturn->end()) { - PHINode *OldPhi = dyn_cast<PHINode>(I); - if (!OldPhi) - break; - - PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); - OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); - - RetPhi->addIncoming(&*I, PreReturn); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), - NewEntryBlock); - OldPhi->removeIncomingValue(NewEntryBlock); - - ++I; + // only split block when necessary: + PHINode *FirstPhi = getFirstPHI(PreReturn); + unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size(); + if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { + + NewReturnBlock = NewReturnBlock->splitBasicBlock( + NewReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock::iterator I = PreReturn->begin(); + Instruction *Ins = &NewReturnBlock->front(); + while (I != PreReturn->end()) { + PHINode *OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) + break; + + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); + OldPhi->replaceAllUsesWith(RetPhi); + Ins = NewReturnBlock->getFirstNonPHI(); + + RetPhi->addIncoming(&*I, PreReturn); + for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); + OldPhi->removeIncomingValue(NewE); + } + ++I; + } + for (auto E : OutliningInfo->ReturnBlockPreds) { + BasicBlock *NewE = cast<BasicBlock>(VMap[E]); + NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + } } - NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + // Returns true if the block is to be partial inlined into the caller + // (i.e. not to be extracted to the out of line function) + auto ToBeInlined = [=](BasicBlock *BB) { + return BB == NewReturnBlock || NewEntries.count(BB); + }; // Gather up the blocks that we're going to extract. std::vector<BasicBlock *> ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) - if (&BB != NewEntryBlock && &BB != NewReturnBlock && - &BB != NewNonReturnBlock) + if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. @@ -183,16 +462,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { if (IsLimitReached()) continue; - NumPartialInlining++; OptimizationRemarkEmitter ORE(CS.getCaller()); + if (!shouldPartialInline(CS, ORE)) + continue; + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); BasicBlock *Block = CS.getParent(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) << ore::NV("Callee", F) << " partially inlined into " << ore::NV("Caller", CS.getCaller())); + InlineFunctionInfo IFI(nullptr, GetAssumptionCache); InlineFunction(CS, IFI); + NumPartialInlining++; + // update stats + NumPartialInlined++; } // Ditch the duplicate, since we're done with it, and rewrite all remaining @@ -200,7 +485,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { DuplicateFunction->replaceAllUsesWith(F); DuplicateFunction->eraseFromParent(); - ++NumPartialInlined; return ExtractedFunction; } @@ -246,6 +530,8 @@ char PartialInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -256,12 +542,25 @@ ModulePass *llvm::createPartialInliningPass() { PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - if (PartialInlinerImpl(IFI).run(M)) + + std::function<BlockFrequencyInfo &(Function &)> GetBFI = + [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult<BlockFrequencyAnalysis>(F); + }; + + std::function<TargetTransformInfo &(Function &)> GetTTI = + [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); + + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 0d5910ebbfcc..203594572618 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(true), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = on)")); + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt<bool> DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), cl::Hidden, cl::desc("Disable shrink-wrap library calls")); +static cl::opt<bool> + EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), + cl::Hidden, + cl::desc("Enable the simple loop unswitch pass.")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 9801a0a61416..d3a3c24ce7b4 100644 --- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -30,42 +30,11 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; namespace { -// Produce a unique identifier for this module by taking the MD5 sum of the -// names of the module's strong external symbols. This identifier is -// normally guaranteed to be unique, or the program would fail to link due to -// multiply defined symbols. -// -// If the module has no strong external symbols (such a module may still have a -// semantic effect if it performs global initialization), we cannot produce a -// unique identifier for this module, so we return the empty string, which -// causes the entire module to be written as a regular LTO module. -std::string getModuleId(Module *M) { - MD5 Md5; - bool ExportsSymbols = false; - for (auto &GV : M->global_values()) { - if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage()) - continue; - ExportsSymbols = true; - Md5.update(GV.getName()); - Md5.update(ArrayRef<uint8_t>{0}); - } - - if (!ExportsSymbols) - return ""; - - MD5::MD5Result R; - Md5.final(R); - - SmallString<32> Str; - MD5::stringifyResult(R, Str); - return ("$" + Str).str(); -} - // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { @@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref<AAResults &(Function &)> AARGetter, Module &M) { - std::string ModuleId = getModuleId(&M); + std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { // We couldn't generate a module ID for this module, just write it out as a // regular LTO module. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 030461004f56..4f1f19499768 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, // Find the most significant known 0 other than the sign bit. int BitWidth = Op0KnownZero.getBitWidth(); APInt Op0KnownZeroTemp(Op0KnownZero); - Op0KnownZeroTemp.clearBit(BitWidth - 1); + Op0KnownZeroTemp.clearSignBit(); int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; @@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc @@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); if (isa<Constant>(RHS)) @@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a97b5a9ec0bb..c7092bf3a398 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I, // (~B | A) & (~A | B) --> ~(A ^ B) // (~B | A) & (B | ~A) --> ~(A ^ B) if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); return nullptr; @@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyAndInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyOrInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyXorInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Instruction *NewXor = foldXorToXor(I)) @@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); + // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. + Value *X, *Y; + + // We must eliminate the and/or (one-use) for these transforms to not increase + // the instruction count. + // ~(~X & Y) --> (X | ~Y) + // ~(Y & ~X) --> (X | ~Y) + if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateOr(X, NotY); + } + // ~(~X | Y) --> (X & ~Y) + // ~(Y | ~X) --> (X & ~Y) + if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateAnd(X, NotY); + } + // Is this a 'not' (~) fed by a binary operator? BinaryOperator *NotOp; if (match(&I, m_Not(m_BinOp(NotOp)))) { if (NotOp->getOpcode() == Instruction::And || NotOp->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(NotOp->getOperand(1))) - NotOp->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) { - Value *NotY = Builder->CreateNot( - NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not"); - if (NotOp->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law + // Apply DeMorgan's Law when inverts are free: + // ~(X & Y) --> (~X | ~Y) + // ~(X | Y) --> (~X & ~Y) if (IsFreeToInvert(NotOp->getOperand(0), NotOp->getOperand(0)->hasOneUse()) && IsFreeToInvert(NotOp->getOperand(1), diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 313ab13b9e2b..4fd90d78a63b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, for (unsigned i = 0; i != NumSubElts; ++i) { unsigned SubEltIdx = (NumSubElts - 1) - i; auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); - Count = Count.shl(BitWidth); + Count <<= BitWidth; Count |= SubElt->getValue().zextOrTrunc(64); } } @@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; - unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros() - : Known.One.countLeadingZeros(); - APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) - : APInt::getHighBitsSet(BitWidth, NumMaskBits); + unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros() + : Known.One.countLeadingZeros(); + unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes() + : Known.Zero.countLeadingOnes(); // If all bits above (ctlz) or below (cttz) the first known one are known // zero, this value is constant. // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. - if (Mask.isSubsetOf(Known.Zero)) { - auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); + if (PossibleZeros == DefiniteZeros) { + auto *C = ConstantInt::get(IT, DefiniteZeros); return IC.replaceInstUsesWith(II, C); } @@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL, - &TLI, &DT, &AC)) + if (Value *V = + SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ)) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -3845,7 +3845,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && isKnownNonNullAt(V, CS.getInstruction(), &DT)) - Indices.push_back(ArgNo + 1); + Indices.push_back(ArgNo + AttributeList::FirstArgIndex); ArgNo++; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index d846a631b96f..60970775de63 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, Max = Known.One|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.setBit(Min.getBitWidth()-1); - Max.clearBit(Max.getBitWidth()-1); + Min.setSignBit(); + Max.clearSignBit(); } } @@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = - SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I)) + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // comparing -val or val with non-zero is the same as just comparing val @@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, - I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I)) + if (Value *V = + SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Simplify 'fcmp pred X, X' diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 776686d3d117..3be6419a129a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -17,9 +17,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -27,10 +29,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/IR/DIBuilder.h" #define DEBUG_TYPE "instcombine" @@ -193,7 +194,7 @@ private: TargetLibraryInfo &TLI; DominatorTree &DT; const DataLayout &DL; - + const SimplifyQuery SQ; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; @@ -203,11 +204,11 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) + AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), LI(LI), MadeIRChange(false) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -533,6 +534,12 @@ private: /// value, or null if it didn't simplify. Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); + /// This tries to simplify binary operations by factorizing out common terms + /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). + Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, + Instruction::BinaryOps, Value *, Value *, Value *, + Value *); + /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ce66581a491a..face9d9237ae 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyMulInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa<Constant>(Op0)) std::swap(Op0, Op1); - if (Value *V = - SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyUDivInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySDivInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); if (isa<Constant>(Op0)) @@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyURemInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySRemInst(Op0, Op1, SQ)) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ)) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 85e5b6ba2dc2..1117c11f4f51 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyInstruction(&PN, SQ)) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 76829c5e457b..7afb8814fe52 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); - if (Value *V = - SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ)) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index f77d713b9b07..219effce7ba5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + I.hasNoUnsignedWrap(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ)) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8d0ed8532779..0195c5e727c9 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero)) + if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero)) Known.Zero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One)) + if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) Known.One |= ~LowBits; assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); @@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask.isSignBitSet()) { computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnown.Zero.isSignBitSet()) - Known.Zero.setSignBit(); + if (LHSKnown.isNonNegative()) + Known.makeNonNegative(); } break; case Instruction::URem: { @@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); - LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); + LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); UndefElts |= LaneElts; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e89b400a4afc..7fc6774f1849 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - if (Value *V = SimplifyExtractElementInst( - EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), + EI.getIndexOperand(), SQ)) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with @@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector<int, 16> Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), - SVI.getType(), DL, &TLI, &DT, &AC)) + if (auto *V = + SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ)) return replaceInstUsesWith(SVI, V); bool MadeChange = false; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4729c79ca4c3..1eb98b18bfb5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); @@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); @@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); @@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout &DL, BinaryOperator &I, - Instruction::BinaryOps InnerOpcode, Value *A, - Value *B, Value *C, Value *D) { +Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, + BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, + Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; @@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + V = SimplifyBinOp(TopLevelOpcode, B, D, SQ); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + V = SimplifyBinOp(TopLevelOpcode, A, C, SQ); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) + if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, - Ident)) + if (Value *V = + tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) - if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident, - C, D)) + if (Value *V = + tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) { // They do! Return "L op' R". ++NumExpand; C = Builder->CreateBinOp(InnerOpcode, L, R); @@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { + if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ)) + if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) { // They do! Return "L op' R". ++NumExpand; A = Builder->CreateBinOp(InnerOpcode, L, R); @@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), DL, &TLI, &DT, &AC)) + SI1->getFalseValue(), SQ)) SI = Builder->CreateSelect(SI0->getCondition(), Builder->CreateBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue()), V); if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), DL, &TLI, &DT, &AC)) + SI1->getTrueValue(), SQ)) SI = Builder->CreateSelect( SI0->getCondition(), V, Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = - SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ)) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (SO1->getType() != GO1->getType()) return nullptr; - Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC); + Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI, return isAllocLikeFn(V, TLI) && V != AI; } -static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, - const TargetLibraryInfo *TLI) { +static bool isAllocSiteRemovable(Instruction *AI, + SmallVectorImpl<WeakTrackingVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. - SmallVector<WeakVH, 64> Users; + SmallVector<WeakTrackingVH, 64> Users; if (isAllocSiteRemovable(&MI, Users, &TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may @@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = - SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ)) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b866958e3c4b..b034ccc46933 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName = "__asan_register_image_globals"; static const char *const kAsanUnregisterImageGlobalsName = "__asan_unregister_image_globals"; +static const char *const kAsanRegisterElfGlobalsName = + "__asan_register_elf_globals"; +static const char *const kAsanUnregisterElfGlobalsName = + "__asan_unregister_elf_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; @@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; + +// ASan version script has __asan_* wildcard. Triple underscore prevents a +// linker (gold) warning about attempting to export a local symbol. static const char *const kAsanGlobalsRegisteredFlagName = - "__asan_globals_registered"; + "___asan_globals_registered"; static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; @@ -270,6 +277,13 @@ static cl::opt<bool> "code stripping of globals"), cl::Hidden, cl::init(true)); +// This is on by default even though there is a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt<bool> + ClWithComdat("asan-with-comdat", + cl::desc("Place ASan constructors in comdat sections"), + cl::Hidden, cl::init(true)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); @@ -607,10 +621,14 @@ public: private: void initializeCallbacks(Module &M); - bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); + void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId); void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, ArrayRef<Constant *> MetadataInitializers); @@ -621,7 +639,8 @@ private: GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName); - void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, + StringRef InternalSuffix); IRBuilder<> CreateAsanModuleDtor(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); @@ -647,6 +666,11 @@ private: Function *AsanUnregisterGlobals; Function *AsanRegisterImageGlobals; Function *AsanUnregisterImageGlobals; + Function *AsanRegisterElfGlobals; + Function *AsanUnregisterElfGlobals; + + Function *AsanCtorFunction = nullptr; + Function *AsanDtorFunction = nullptr; }; // Stack poisoning does not play well with exception handling. @@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!CA) + return; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); for (Use &OP : CA->operands()) { if (isa<ConstantAggregateZero>(OP)) continue; ConstantStruct *CS = cast<ConstantStruct>(OP); @@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); + + AsanRegisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage); + + AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage); } // Put the metadata and the instrumented global in the same group. This ensures // that the metadata is discarded if the instrumented global is discarded. void AddressSanitizerModule::SetComdatForGlobalMetadata( - GlobalVariable *G, GlobalVariable *Metadata) { + GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) { Module &M = *G->getParent(); Comdat *C = G->getComdat(); if (!C) { @@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata( assert(G->hasLocalLinkage()); G->setName(Twine(kAsanGenPrefix) + "_anon_global"); } - C = M.getOrInsertComdat(G->getName()); + + if (!InternalSuffix.empty() && G->hasLocalLinkage()) { + std::string Name = G->getName(); + Name += InternalSuffix; + C = M.getOrInsertComdat(Name); + } else { + C = M.getOrInsertComdat(G->getName()); + } + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. if (TargetTriple.isOSBinFormatCOFF()) C->setSelectionKind(Comdat::NoDuplicates); @@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, } IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { - Function *AsanDtorFunction = + AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); } @@ -1665,8 +1711,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF( "global metadata will not be padded appropriately"); Metadata->setAlignment(SizeOfGlobalStruct); - SetComdatForGlobalMetadata(G, Metadata); + SetComdatForGlobalMetadata(G, Metadata, ""); + } +} + +void AddressSanitizerModule::InstrumentGlobalsELF( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers, + const std::string &UniqueModuleId) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); + MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); + Metadata->setMetadata(LLVMContext::MD_associated, MD); + MetadataGlobals[i] = Metadata; + + SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); } + + // Update llvm.compiler.used, adding the new metadata globals. This is + // needed so that during LTO these variables stay alive. + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // Common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + // Create start and stop symbols. + GlobalVariable *StartELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__start_" + getGlobalMetadataSection()); + StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + GlobalVariable *StopELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__stop_" + getGlobalMetadataSection()); + StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + + // Create a call to register the globals with the runtime. + IRB.CreateCall(AsanRegisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); } void AddressSanitizerModule::InstrumentGlobalsMachO( @@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { +// Sets *CtorComdat to true if the global registration code emitted into the +// asan constructor is comdat-compatible. +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { + *CtorComdat = false; GlobalsMD.init(M); SmallVector<GlobalVariable *, 16> GlobalsToChange; @@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } size_t n = GlobalsToChange.size(); - if (n == 0) return false; + if (n == 0) { + *CtorComdat = true; + return false; + } auto &DL = M.getDataLayout(); @@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { + std::string ELFUniqueModuleId = + (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) + : ""; + + if (!ELFUniqueModuleId.empty()) { + InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); + *CtorComdat = true; + } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); @@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) { if (CompileKernel) return false; - Function *AsanCtorFunction; + // Create a module constructor. A destructor is created lazily because not all + // platforms, and not all modules need it. std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + bool CtorComdat = true; bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M); + Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + } + + // Put the constructor and destructor in comdat if both + // (1) global instrumentation is not TU-specific + // (2) target is ELF. + if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { + AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, + AsanCtorFunction); + if (AsanDtorFunction) { + AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, + AsanDtorFunction); + } + } else { + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + if (AsanDtorFunction) + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); } return Changed; @@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 4e454f0c95b6..8786781933ea 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass { MDNode *ColdCallWeights; DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; - AttributeList ReadOnlyNoneAttrs; + AttrBuilder ReadOnlyNoneAttrs; bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); @@ -544,16 +544,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, NewF->copyAttributesFrom(F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - F->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeAttributes( - AttributeList::FunctionIndex, - AttributeList().addAttribute(*Ctx, AttributeList::FunctionIndex, - "split-stack")); + NewF->removeAttributes(AttributeList::FunctionIndex, + AttrBuilder().addAttribute("split-stack")); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); @@ -629,16 +625,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); @@ -652,7 +648,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanSetLabelFn = Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) { - F->addAttribute(1, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); } DFSanNonzeroLabelFn = Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); @@ -698,9 +694,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - ReadOnlyNoneAttrs = AttributeList::get(*Ctx, AttributeList::FunctionIndex, B); + ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::ReadNone); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. @@ -722,9 +717,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewF->copyAttributesFrom(&F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); for (Function::arg_iterator FArg = F.arg_begin(), NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); @@ -989,8 +982,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { if (AvoidNewBlocks) { CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); CCS.Block = Pos->getParent(); CCS.Shadow = Call; @@ -1002,8 +995,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { IRBuilder<> ThenIRB(BI); CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index d7eb857cff7e..493d014586c6 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -771,7 +771,7 @@ public: if (perform(MI)) { Changed = true; ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName() + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() << "is Transformed.\n"); } } @@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { ActualCount = *BBEdgeCount; } + ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + if (ActualCount < MemOPCountThreshold) return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; - ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); TotalCount = ActualCount; if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount << " denominator = " << SavedTotalCount << "\n"); // Keeping track of the count of the default case: @@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { MaxCount = RemainCount; uint64_t SumForOpt = TotalCount - RemainCount; - DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs " - << TotalCount << "\n"); - DEBUG( - for (auto &VD - : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions\n"); + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); // mem_op(..., size) // ==> diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d91ac6ac7883..9a82532d7703 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -241,7 +241,7 @@ static Constant *getOrInsertValueProfilingCall(Module &M, if (Function *FunRes = dyn_cast<Function>(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) - FunRes->addAttribute(3, AK); + FunRes->addParamAttr(2, AK); } return Res; } @@ -292,7 +292,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); } if (auto AK = TLI->getExtAttrForI32Param(false)) - Call->addAttribute(3, AK); + Call->addParamAttr(2, AK); Ind->replaceAllUsesWith(Call); Ind->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 190f05db4b0c..15333a5317dd 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2607,10 +2607,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttributes(AttributeList::FunctionIndex, - AttributeList::get(Func->getContext(), - AttributeList::FunctionIndex, - B)); + Func->removeAttributes(AttributeList::FunctionIndex, B); } maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); @@ -2643,7 +2640,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { "ByVal argument is not a pointer!"); Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; - unsigned ParamAlignment = CS.getParamAlignment(i + 1); + unsigned ParamAlignment = CS.getParamAlignment(i); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); Store = IRB.CreateMemCpy(ArgShadowBase, getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), @@ -3502,7 +3499,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper { assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo); if (ArgAlign < 8) ArgAlign = 8; VAArgOffset = alignTo(VAArgOffset, ArgAlign); @@ -3659,9 +3656,7 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F.getContext(), AttributeList::FunctionIndex, B)); + F.removeAttributes(AttributeList::FunctionIndex, B); return Visitor.runOnFunction(); } diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index c541fa4c8bee..cb3b5757f8d0 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -163,7 +163,7 @@ private: AttributeList Attr = AttributeList().addAttribute( C, AttributeList::FunctionIndex, Attribute::NoUnwind); - Attr = Attr.addAttribute(C, 1, Attribute::NoCapture); + Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture); FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false); diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h index f02b75f0b456..cd9b3d96a14f 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h +++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h @@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } +/// If Inst is a ReturnRV and its operand is a call or invoke, return the +/// operand. Otherwise return null. +static inline const Instruction *getreturnRVOperand(const Instruction &Inst, + ARCInstKind Class) { + if (Class != ARCInstKind::RetainRV) + return nullptr; + + const auto *Opnd = Inst.getOperand(0)->stripPointerCasts(); + if (const auto *C = dyn_cast<CallInst>(Opnd)) + return C; + return dyn_cast<InvokeInst>(Opnd); +} + } // end namespace objcarc } // end namespace llvm diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp index c1bbc4e96b16..d13e941044f1 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp +++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp @@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { + auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){ + assert(!HasReverseInsertPts()); + SetSeq(NewSeq); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(&*BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(&*++Inst->getIterator()); + }; + // Check for possible direct uses. switch (GetSeq()) { case S_Release: @@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, if (CanUse(Inst, Ptr, PA, Class)) { DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); - assert(!HasReverseInsertPts()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); - SetSeq(S_Use); + SetSeqAndInsertReverseInsertPt(S_Use); } else if (Seq == S_Release && IsUser(Class)) { DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); // Non-movable releases depend on any possible objc pointer use. - SetSeq(S_Stop); - assert(!HasReverseInsertPts()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); + SetSeqAndInsertReverseInsertPt(S_Stop); + } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) { + if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) { + DEBUG(dbgs() << " ReleaseUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + SetSeqAndInsertReverseInsertPt(S_Stop); + } } break; case S_Stop: diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b5a4cc2f3953..3f1a77b49a44 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, Changed = true; } - if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { + if (Value *V = SimplifyInstruction(P, SQ)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -318,7 +318,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) - Indices.push_back(ArgNo + 1); + Indices.push_back(ArgNo + AttributeList::FirstArgIndex); ArgNo++; } @@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>(); - auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - return runImpl(F, LVI, SQ); + return runImpl(F, LVI, getBestSimplifyQuery(*this, F)); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); - auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); - auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); - auto *AC = AM.getCachedResult<AssumptionAnalysis>(F); - const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); - bool Changed = runImpl(F, LVI, SQ); + bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F)); if (!Changed) return PreservedAnalyses::all(); diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 04479b6e49ac..d8f8a58a5fdf 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -253,6 +253,7 @@ public: const TargetTransformInfo &TTI; DominatorTree &DT; AssumptionCache &AC; + const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr<MemorySSAUpdater> MSSAUpdater; typedef RecyclingAllocator< @@ -315,9 +316,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) - : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), + EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, DominatorTree &DT, + AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) { } @@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { /// stores which can occur in bitfield code among other things. Instruction *LastStore = nullptr; - const DataLayout &DL = BB->getModule()->getDataLayout(); - // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { @@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Skip assume intrinsics, they don't really have side effects (although // they're marked as such to ensure preservation of control dependencies), - // and this pass will not disturb any of the assumption's control - // dependencies. + // and this pass will not bother with its removal. However, we should mark + // its condition as true for all dominated blocks. if (match(Inst, m_Intrinsic<Intrinsic::assume>())) { - DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + auto *CondI = + dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0)); + if (CondI && SimpleValue::canHandle(CondI)) { + DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n'); + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } else + DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); continue; } @@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) { if (auto *CondI = dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) { - // The condition we're on guarding here is true for all dominated - // locations. - if (SimpleValue::canHandle(CondI)) + if (SimpleValue::canHandle(CondI)) { + // Do we already know the actual value of this condition? + if (auto *KnownCond = AvailableValues.lookup(CondI)) { + // Is the condition known to be true? + if (isa<ConstantInt>(KnownCond) && + cast<ConstantInt>(KnownCond)->isOneValue()) { + DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + continue; + } else + // Use the known value if it wasn't true. + cast<CallInst>(Inst)->setArgOperand(0, KnownCond); + } + // The condition we're on guarding here is true for all dominated + // locations. AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } } // Guard intrinsics read all memory, but don't write any memory. @@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { + if (Value *V = SimplifyInstruction(Inst, SQ)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); bool Killed = false; if (!Inst->use_empty()) { @@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto *MSSA = UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1008,7 +1029,7 @@ public: auto *MSSA = UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); return CSE.run(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index be696df548d5..c04646eed49a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { bool Changed = false; if (!I->use_empty()) { I->replaceAllUsesWith(V); diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 48eda09c463e..198d2b2b024f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -613,16 +613,16 @@ bool GuardWideningImpl::combineRangeChecks( // We have a series of f+1 checks as: // // I+k_0 u< L ... Chk_0 - // I_k_1 u< L ... Chk_1 + // I+k_1 u< L ... Chk_1 // ... - // I_k_f u< L ... Chk_(f+1) + // I+k_f u< L ... Chk_f // - // with forall i in [0,f): k_f-k_i u< k_f-k_0 ... Precond_0 + // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 // k_f != k_0 ... Precond_2 // // Claim: - // Chk_0 AND Chk_(f+1) implies all the other checks + // Chk_0 AND Chk_f implies all the other checks // // Informal proof sketch: // diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index dcb2a4a0c6e6..3953198fe605 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -97,7 +97,7 @@ class IndVarSimplify { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; bool Changed = false; bool isValidRewrite(Value *FromVal, Value *ToVal); @@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { Compare->getName()); // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; + // Use a WeakTrackingVH to observe whether this happens. + WeakTrackingVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. @@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallVector<WeakVH, 8> PHIs; + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = Header->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); @@ -901,7 +901,7 @@ class WidenIV { PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; SmallPtrSet<Instruction *,16> Widened; SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; @@ -941,20 +941,13 @@ class WidenIV { } public: - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, - ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl<WeakVH> &DI, bool HasGuards) : - OrigPhi(WI.NarrowIV), - WideType(WI.WidestNativeType), - LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), - SE(SEv), - DT(DTree), - HasGuards(HasGuards), - WidePhi(nullptr), - WideInc(nullptr), - WideIncExpr(nullptr), - DeadInsts(DI) { + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, + bool HasGuards) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } diff --git a/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 9e2563879da2..5e116ef2fe75 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -138,7 +138,7 @@ private: // Tries to infer the specific address space of each address expression in // Postorder. - void inferAddressSpaces(const std::vector<Value *> &Postorder, + void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const; bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; @@ -147,7 +147,7 @@ private: // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. bool - rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, + rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; @@ -162,7 +162,7 @@ private: std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const; - std::vector<Value *> collectFlatAddressExpressions(Function &F) const; + std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, @@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, DenseSet<Value *> &Visited) const { assert(V->getType()->isPointerTy()); + + // Generic addressing expressions may be hidden in nested constant + // expressions. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // TODO: Look in non-address parts, like icmp operands. + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.push_back(std::make_pair(CE, false)); + + return; + } + if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited.insert(V).second) + if (Visited.insert(V).second) { PostorderStack.push_back(std::make_pair(V, false)); + + Operator *Op = cast<Operator>(V); + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.emplace_back(CE, false); + } + } + } } } // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. -std::vector<Value *> +std::vector<WeakTrackingVH> InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. @@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } + } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { + if (!ASC->getType()->isVectorTy()) + PushPtrOperand(ASC->getPointerOperand()); } } - std::vector<Value *> Postorder; // The resultant postorder. + std::vector<WeakTrackingVH> Postorder; // The resultant postorder. while (!PostorderStack.empty()) { + Value *TopVal = PostorderStack.back().first; // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(PostorderStack.back().first); + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; - for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { + for (Value *PtrOperand : getPointerOperands(*TopVal)) { appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, Visited); } @@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return false; // Collects all flat address expressions in postorder. - std::vector<Value *> Postorder = collectFlatAddressExpressions(F); + std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); // Runs a data-flow analysis to refine the address spaces of every expression // in Postorder. @@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); } +// Constants need to be tracked through RAUW to handle cases with nested +// constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpaces::inferAddressSpaces( - const std::vector<Value *> &Postorder, + ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. @@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - const std::vector<Value *> &Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { + ArrayRef<WeakTrackingVH> Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); } + SmallVector<Instruction *, 16> DeadInstructions; + // Replaces the uses of the old address expressions with the new ones. - for (Value *V : Postorder) { + for (const WeakTrackingVH &WVH : Postorder) { + assert(WVH && "value was unexpectedly deleted"); + Value *V = WVH; Value *NewV = ValueWithNewAddrSpace.lookup(V); if (NewV == nullptr) continue; @@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " << *NewV << '\n'); + if (Constant *C = dyn_cast<Constant>(V)) { + Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), + C->getType()); + if (C != Replace) { + DEBUG(dbgs() << "Inserting replacement const cast: " + << Replace << ": " << *Replace << '\n'); + C->replaceAllUsesWith(Replace); + V = Replace; + } + } + Value::use_iterator I, E, Next; for (I = V->use_begin(), E = V->use_end(); I != E; ) { Use &U = *I; @@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } + if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (ASC->getDestAddressSpace() == NewAS) { + ASC->replaceAllUsesWith(NewV); + DeadInstructions.push_back(ASC); + continue; + } + } + // Otherwise, replaces the use with flat(NewV). if (Instruction *I = dyn_cast<Instruction>(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); @@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } - if (V->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(V); + if (V->use_empty()) { + if (Instruction *I = dyn_cast<Instruction>(V)) + DeadInstructions.push_back(I); + } } + for (Instruction *I : DeadInstructions) + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index a0da81605a80..7dacaba1193e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *LHS = PN->getIncomingValue(i); Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); if (!Res) { if (!isa<Constant>(RHS)) continue; @@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + Constant *OnlyVal = nullptr; + Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; + unsigned PredWithKnownDest = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa<IndirectBrInst>(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa<UndefValue>(Val)) DestBB = nullptr; - else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); - else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + assert(isa<ConstantInt>(Val) && "Expecting a constant integer"); DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor(); } else { assert(isa<IndirectBrInst>(BB->getTerminator()) && "Unexpected terminator"); + assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress"); DestBB = cast<BlockAddress>(Val)->getBasicBlock(); } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) + if (PredToDestList.empty()) { OnlyDest = DestBB; - else if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. default + // case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } + + // We know where this predecessor is going. + ++PredWithKnownDest; + + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // not thread. By doing so, we do not need to duplicate the current block and // also miss potential opportunities in case we dont/cant duplicate. if (OnlyDest && OnlyDest != MultipleDestSentinel) { - if (PredToDestList.size() == + if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; for (BasicBlock *SuccBB : successors(BB)) { @@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the condition is now dead due to the removal of the old terminator, // erase it. - auto *CondInst = dyn_cast<Instruction>(Cond); - if (CondInst && CondInst->use_empty()) - CondInst->eraseFromParent(); - // FIXME: in case this instruction is defined in the current BB and it - // resolves to a single value from all predecessors, we can do RAUW. + if (auto *CondInst = dyn_cast<Instruction>(Cond)) { + if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) { + // If we just learned Cond is the same value for all uses of the + // condition, replace it with a constant value + CondInst->replaceAllUsesWith(OnlyVal); + if (!CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + } + } return true; } } @@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = - SimplifyInstruction(New, BB->getModule()->getDataLayout())) { + if (Value *IV = SimplifyInstruction( + New, + {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { delete New; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 73e8ce0e1d93..3151ccd279c4 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -29,6 +30,21 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// loop. The first kind (\p isLoopDead) is where only invariant values from +/// within the loop are used outside of it. The second kind (\p +/// isLoopNeverExecuted) is where the loop is provably never executed. We can +/// always remove never executed loops since they will not cause any +/// difference to program behaviour. +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI. It also updates the loop PM if an updater struct is provided. +// TODO: This function will be used by loop-simplifyCFG as well. So, move this +// to LoopUtils.cpp +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -84,12 +100,44 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, return true; } +/// This function returns true if there is no viable path from the +/// entry block to the header of \p L. Right now, it only does +/// a local search to save compile time. +static bool isLoopNeverExecuted(Loop *L) { + using namespace PatternMatch; + + auto *Preheader = L->getLoopPreheader(); + // TODO: We can relax this constraint, since we just need a loop + // predecessor. + assert(Preheader && "Needs preheader!"); + + if (Preheader == &Preheader->getParent()->getEntryBlock()) + return false; + // All predecessors of the preheader should have a constant conditional + // branch, with the loop's preheader as not-taken. + for (auto *Pred: predecessors(Preheader)) { + BasicBlock *Taken, *NotTaken; + ConstantInt *Cond; + if (!match(Pred->getTerminator(), + m_Br(m_ConstantInt(Cond), Taken, NotTaken))) + return false; + if (!Cond->getZExtValue()) + std::swap(Taken, NotTaken); + if (Taken == Preheader) + return false; + } + assert(!pred_empty(Preheader) && + "Preheader should have predecessors at this point!"); + // All the predecessors have the loop preheader as not-taken target. + return true; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of /// the program other than finite running time. This never removes a loop that -/// might be infinite, as doing so could change the halting/non-halting nature -/// of a program. +/// might be infinite (unless it is never executed), as doing so could change +/// the halting/non-halting nature of a program. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -97,9 +145,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, /// \returns true if any changes were made. This may mutate the loop even if it /// is unable to delete it due to hoisting trivially loop invariant /// instructions out of the loop. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); @@ -119,6 +164,17 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (L->begin() != L->end()) return false; + + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + + if (ExitBlock && isLoopNeverExecuted(L)) { + deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + return true; + } + + // The remaining checks below are for a loop being dead because all statements + // in the loop are invariant. SmallVector<BasicBlock *, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -126,7 +182,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (!ExitBlock) return false; @@ -141,6 +196,19 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (isa<SCEVCouldNotCompute>(S)) return Changed; + deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + + return true; +} + +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // @@ -156,17 +224,29 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // to determine what it needs to clean up. SE.forgetLoop(L); + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + // Connect the preheader directly to the exit block. - TerminatorInst *TI = Preheader->getTerminator(); - TI->replaceUsesOfWith(L->getHeader(), ExitBlock); + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - // Rewrite phis in the exit block to get their inputs from - // the preheader instead of the exiting block. + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(ExitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + if (LoopIsNeverExecuted) + P->setIncomingValue(j, UndefValue::get(P->getType())); P->setIncomingBlock(j, Preheader); for (unsigned i = 1; i < ExitingBlocks.size(); ++i) P->removeIncomingValue(ExitingBlocks[i]); @@ -211,9 +291,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // The last step is to update LoopInfo now that we've eliminated this loop. LI.markAsRemoved(L); - ++NumDeleted; - - return true; } PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, @@ -254,7 +331,6 @@ Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); } bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { if (skipLoop(L)) return false; - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5042fc18d7c4..410fbb03068f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { - WeakVH InstPtr(&*I); + WeakTrackingVH InstPtr(&*I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; @@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like -/// for (i) A[i] = B[i]; +/// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp index 28e71ca05436..af095560cc02 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); + Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 8ce96cf1b7a6..2ba9265566a8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); + Value *V = SimplifyInstruction(C, SQ); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); + const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,10 +715,7 @@ public: auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - const SimplifyQuery SQ(DL, TLI, DT, AC); + const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a5a81c33a8eb..35c05e84fd68 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { bool Changed = false; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. - SmallVector<WeakVH, 16> Blocks(L.blocks()); + SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index af137f6faa63..ccedb98d7fa1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S, /// If any of the instructions is the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { @@ -1845,7 +1845,7 @@ class LSRInstance { void FinalizeChain(IVChain &Chain); void CollectChains(); void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1920,19 +1920,15 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, + BasicBlock::iterator IP, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; - void Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const; + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); public: @@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. const IVInc &Head = Chain.Incs[0]; @@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, - const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::RewriteForPHI( + PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, + SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { @@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution( const SmallVectorImpl<const Formula *> &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr"); @@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 8fa806a7e8bc..6ef1464e9338 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopProcessWorklist.push_back(NewLoop); redoLoop = true; - // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // Keep a WeakTrackingVH holding onto LIC. If the first call to + // RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. - WeakVH LICHandle(LIC); + WeakTrackingVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. @@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, LPM->deleteSimpleAnalysisValue(I, L); RemoveFromWorklist(I, Worklist); I->replaceAllUsesWith(V); - I->eraseFromParent(); + if (!I->mayHaveSideEffects()) + I->eraseFromParent(); ++NumSimplify; } @@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(dbgs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3f3f25c1e0f..21a632073da7 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // Get the alignment of the byval. If the call doesn't specify the alignment, // then it is some target specific value that we can't know. - unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + unsigned ByValAlign = CS.getParamAlignment(ArgNo); if (ByValAlign == 0) return false; // If it is greater than the memcpy, then we check to see if we can force the diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp index c5bf2f28d185..d0bfe3603897 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) { Changed = true; SE->forgetValue(&*I); I->replaceAllUsesWith(NewI); - // If SeenExprs constains I's WeakVH, that entry will be replaced with + // If SeenExprs constains I's WeakTrackingVH, that entry will be + // replaced with // nullptr. RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); I = NewI->getIterator(); @@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. const SCEV *NewSCEV = SE->getSCEV(&*I); - SeenExprs[NewSCEV].push_back(WeakVH(&*I)); + SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I)); // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. if (NewSCEV != OldSCEV) - SeenExprs[OldSCEV].push_back(WeakVH(&*I)); + SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I)); } } } @@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr, // future instruction either. Therefore, we pop it out of the stack. This // optimization makes the algorithm O(n). while (!Candidates.empty()) { - // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed + // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's + // removed // during rewriting. if (Value *Candidate = Candidates.back()) { Instruction *CandidateInstruction = cast<Instruction>(Candidate); diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index a014ddd9ba0a..62b5d80d611b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -395,7 +395,6 @@ namespace { class NewGVN { Function &F; DominatorTree *DT; - AssumptionCache *AC; const TargetLibraryInfo *TLI; AliasAnalysis *AA; MemorySSA *MSSA; @@ -405,6 +404,7 @@ class NewGVN { BumpPtrAllocator ExpressionAllocator; ArrayRecycler<Value *> ArgRecycler; TarjanSCC SCCFinder; + const SimplifyQuery SQ; // Number of function arguments, used by ranking unsigned int NumFuncArgs; @@ -504,8 +504,9 @@ public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) - : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {} + : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), + PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { + } bool runGVN(); private: @@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, E->op_push_back(lookupOperandLeader(Arg1)); E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, - DT, AC); + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) return SimplifiedE; return E; @@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) { "Wrong types on cmp instruction"); assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && E->getOperand(1)->getType() == I->getOperand(1)->getType())); - Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<SelectInst>(I)) { @@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) { assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && E->getOperand(2)->getType() == I->getOperand(2)->getType()); Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), DL, TLI, DT, AC); + E->getOperand(2), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } } else if (I->isBinaryOp()) { - Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (auto *BI = dyn_cast<BitCastInst>(I)) { - Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); + Value *V = + SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<GetElementPtrInst>(I)) { - Value *V = SimplifyGEPInst(E->getType(), - ArrayRef<Value *>(E->op_begin(), E->op_end()), - DL, TLI, DT, AC); + Value *V = SimplifyGEPInst( + E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (AllConstant) { @@ -1440,18 +1440,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands - // were constant. - // This is really shorthand for "this phi cannot cycle due to forward - // propagation", as any - // change in value of the phi is guaranteed not to later change the value of - // the phi. + // were constant. This is really shorthand for "this phi cannot cycle due + // to forward propagation", as any change in value of the phi is guaranteed + // not to later change the value of the phi. // IE it can't be v = phi(undef, v+1) bool AllConstant = true; auto *E = cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. - - // See if all arguaments are the same. + // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { @@ -1628,15 +1625,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { if (PBranch->TrueEdge) { // If we know the previous predicate is true and we are in the true // edge then we may be implied true or false. - if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getTrue(CI->getType())); } - if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getFalse(CI->getType())); diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 3dcab6090789..ef29d4141600 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, /// Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. static Value *EmitAddTreeOfValues(Instruction *I, - SmallVectorImpl<WeakVH> &Ops){ + SmallVectorImpl<WeakTrackingVH> &Ops) { if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); - SmallVector<WeakVH, 4> NewMulOps; + SmallVector<WeakTrackingVH, 4> NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f344eb151464..77b2bd84f9b6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AS) { - AttributeList Ret; - - for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { - unsigned Index = AS.getSlotIndex(Slot); - - if (Index == AttributeList::ReturnIndex || - Index == AttributeList::FunctionIndex) { - - for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { - - // Do not allow certain attributes - just skip them - // Safepoint can not be read only or read none. - if (Attr.hasAttribute(Attribute::ReadNone) || - Attr.hasAttribute(Attribute::ReadOnly)) - continue; - - // These attributes control the generation of the gc.statepoint call / - // invoke itself; and once the gc.statepoint is in place, they're of no - // use. - if (isStatepointDirectiveAttr(Attr)) - continue; - - Ret = Ret.addAttributes( - AS.getContext(), Index, - AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr))); - } - } - - // Just skip parameter attributes for now - } - - return Ret; +static AttributeList legalizeCallAttributes(AttributeList AL) { + if (AL.isEmpty()) + return AL; + + // Remove the readonly, readnone, and statepoint function attributes. + AttrBuilder FnAttrs = AL.getFnAttributes(); + FnAttrs.removeAttribute(Attribute::ReadNone); + FnAttrs.removeAttribute(Attribute::ReadOnly); + for (Attribute A : AL.getFnAttributes()) { + if (isStatepointDirectiveAttr(A)) + FnAttrs.remove(A); + } + + // Just skip parameter and return attributes for now + LLVMContext &Ctx = AL.getContext(); + return AttributeList::get(Ctx, AttributeList::FunctionIndex, + AttributeSet::get(Ctx, FnAttrs)); } /// Helper function to place all gc relocates necessary for the given @@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Call->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Call->setAttributes(AttributeList::get(Call->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Call; @@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Invoke->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Invoke->setAttributes(AttributeList::get(Invoke->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Invoke; @@ -2308,12 +2286,11 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, if (AH.getDereferenceableOrNullBytes(Index)) R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, AH.getDereferenceableOrNullBytes(Index))); - if (AH.doesNotAlias(Index)) + if (AH.getAttributes().hasAttribute(Index, Attribute::NoAlias)) R.addAttribute(Attribute::NoAlias); if (!R.empty()) - AH.setAttributes(AH.getAttributes().removeAttributes( - Ctx, Index, AttributeList::get(Ctx, Index, R))); + AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R)); } void @@ -2322,7 +2299,8 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { for (Argument &A : F.args()) if (isa<PointerType>(A.getType())) - RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1); + RemoveNonValidAttrAtIndex(Ctx, F, + A.getArgNo() + AttributeList::FirstArgIndex); if (isa<PointerType>(F.getReturnType())) RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); @@ -2358,7 +2336,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa<PointerType>(CS.getArgument(i)->getType())) - RemoveNonValidAttrAtIndex(Ctx, CS, i + 1); + RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex); if (isa<PointerType>(CS.getType())) RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex); } diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp index d01e91a7f235..1d9beffaf06b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp @@ -25,6 +25,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter Instruction *OldPtr; // Track post-rewrite users which are PHI nodes and Selects. - SmallPtrSetImpl<PHINode *> &PHIUsers; - SmallPtrSetImpl<SelectInst *> &SelectUsers; + SmallSetVector<PHINode *, 8> &PHIUsers; + SmallSetVector<SelectInst *, 8> &SelectUsers; // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. @@ -2199,8 +2200,8 @@ public: uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, - SmallPtrSetImpl<PHINode *> &PHIUsers, - SmallPtrSetImpl<SelectInst *> &SelectUsers) + SmallSetVector<PHINode *, 8> &PHIUsers, + SmallSetVector<SelectInst *, 8> &SelectUsers) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // fact scheduled for promotion. unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned NumUses = 0; - SmallPtrSet<PHINode *, 8> PHIUsers; - SmallPtrSet<SelectInst *, 8> SelectUsers; + SmallSetVector<PHINode *, 8> PHIUsers; + SmallSetVector<SelectInst *, 8> SelectUsers; AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, @@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. - for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), - E = PHIUsers.end(); - I != E; ++I) - if (!isSafePHIToSpeculate(**I)) { + for (PHINode *PHI : PHIUsers) + if (!isSafePHIToSpeculate(*PHI)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); break; } - for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), - E = SelectUsers.end(); - I != E; ++I) - if (!isSafeSelectToSpeculate(**I)) { + + for (SelectInst *Sel : SelectUsers) + if (!isSafeSelectToSpeculate(*Sel)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp index 00e3c95f6f06..52201d8f3e51 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); + initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4d594532c365..cde659b9d189 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { // Add I to DominatingExprs if it's an add/sub that can't sign overflow. if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { - if (isKnownNotFullPoison(I)) { + if (programUndefinedIfFullPoison(I)) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); DominatingExprs[Key].push_back(I); diff --git a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp new file mode 100644 index 000000000000..fb1b47c48276 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,626 @@ +//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, + Constant &Replacement) { + assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); + + // Replace uses of LIC in the loop with the given constant. + for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast<Instruction>(U->getUser()); + if (!UserI || !L.contains(UserI)) + continue; + + // Replace this use within the loop body. + *U = &Replacement; + } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, + DominatorTree &DT) { + assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && + "Cannot have empty predecessors of the loop exit block if we split " + "off a block to unswitch!"); + + BasicBlock *IDom = *pred_begin(LoopExitBB); + // Walk all of the other predecessors finding the nearest common dominator + // until all predecessors are covered or we reach the loop header. The loop + // header necessarily dominates all loop exit blocks in loop simplified form + // so we can early-exit the moment we hit that block. + for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); + PI != PE && IDom != L.getHeader(); ++PI) + IDom = DT.findNearestCommonDominator(IDom, *PI); + + DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, + DominatorTree &DT) { + DomTreeNode *OldPHNode = DT[OldPH]; + DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; + // If the dominator tree has already been updated for this unswitched node, + // we're done. This makes it easier to use this routine if there are multiple + // paths to the same unswitched destination. + if (UnswitchedNode->getIDom() == OldPHNode) + return; + + // First collect the domtree nodes that we are hoisting over. These are the + // set of nodes which may have children that need to be hoisted as well. + SmallPtrSet<DomTreeNode *, 4> DomChain; + for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; + IDom = IDom->getIDom()) + DomChain.insert(IDom); + + // The unswitched block ends up immediately dominated by the old preheader -- + // regardless of whether it is the loop exit block or split off of the loop + // exit block. + DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + + // Blocks reachable from the unswitched block may need to change their IDom + // as well. + SmallSetVector<BasicBlock *, 4> Worklist; + for (auto *SuccBB : successors(UnswitchedBB)) + Worklist.insert(SuccBB); + + // Walk the worklist. We grow the list in the loop and so must recompute size. + for (int i = 0; i < (int)Worklist.size(); ++i) { + auto *BB = Worklist[i]; + + DomTreeNode *Node = DT[BB]; + assert(!DomChain.count(Node) && + "Cannot be dominated by a block you can reach!"); + // If this block doesn't have an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree hasn't changed. Either + // it is above the region hoisted and still valid, or it is below the + // hoisted block and so was trivially updated. This also applies to + // everything reachable from this block so we're completely done with the + // it. + if (!DomChain.count(Node->getIDom())) + continue; + + // We need to change the IDom for this node but also walk its successors + // which could have similar dominance position. + DT.changeImmediateDominator(Node, OldPHNode); + for (auto *SuccBB : successors(BB)) + Worklist.insert(SuccBB); + } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, + LoopInfo &LI) { + assert(BI.isConditional() && "Can only unswitch a conditional branch!"); + DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); + + Value *LoopCond = BI.getCondition(); + + // Need a trivial loop condition to unswitch. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); + ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); + int LoopExitSuccIdx = 0; + auto *LoopExitBB = BI.getSuccessor(0); + if (!ExitBlockSet.count(LoopExitBB)) { + std::swap(CondVal, Replacement); + LoopExitSuccIdx = 1; + LoopExitBB = BI.getSuccessor(1); + if (!ExitBlockSet.count(LoopExitBB)) + return false; + } + auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); + assert(L.contains(ContinueBB) && + "Cannot have both successors exit and still be in the loop!"); + + // If the loop exit block contains phi nodes, this isn't trivial. + // FIXME: We should examine the PHI to determine whether or not we can handle + // it trivially. + if (isa<PHINode>(LoopExitBB->begin())) + return false; + + DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal + << " == " << LoopCond << "\n"); + + // Split the preheader, so that we know that there is a safe place to insert + // the conditional branch. We will change the preheader to have a conditional + // branch on LoopCond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we are + // unswitching. We need to split this if there are other loop predecessors. + // Because the loop is in simplified form, *any* other predecessor is enough. + BasicBlock *UnswitchedBB; + if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { + (void)PredBB; + assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!"); + UnswitchedBB = LoopExitBB; + } else { + UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + } + + BasicBlock *ParentBB = BI.getParent(); + + // Now splice the branch to gate reaching the new preheader and re-point its + // successors. + OldPH->getInstList().splice(std::prev(OldPH->end()), + BI.getParent()->getInstList(), BI); + OldPH->getTerminator()->eraseFromParent(); + BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); + BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); + + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + + // Now we need to update the dominator tree. + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + // But if we split something off of the loop exit block then we also removed + // one of the predecessors for the loop exit block and may need to update its + // idom. + if (UnswitchedBB != LoopExitBB) + updateLoopExitIDom(LoopExitBB, L, DT); + + // Since this is an i1 condition we can also trivially replace uses of it + // within the loop with a constant. + replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + + ++NumTrivial; + ++NumBranches; + return true; +} + +/// Unswitch a trivial switch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, + LoopInfo &LI) { + DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); + Value *LoopCond = SI.getCondition(); + + // If this isn't switching on an invariant condition, we can't unswitch it. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector<BasicBlock *, 16> ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + SmallVector<int, 4> ExitCaseIndices; + for (auto Case : SI.cases()) { + auto *SuccBB = Case.getCaseSuccessor(); + if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin())) + ExitCaseIndices.push_back(Case.getCaseIndex()); + } + BasicBlock *DefaultExitBB = nullptr; + if (ExitBlockSet.count(SI.getDefaultDest()) && + !isa<PHINode>(SI.getDefaultDest()->begin()) && + !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) + DefaultExitBB = SI.getDefaultDest(); + else if (ExitCaseIndices.empty()) + return false; + + DEBUG(dbgs() << " unswitching trivial cases...\n"); + + SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; + ExitCases.reserve(ExitCaseIndices.size()); + // We walk the case indices backwards so that we remove the last case first + // and don't disrupt the earlier indices. + for (unsigned Index : reverse(ExitCaseIndices)) { + auto CaseI = SI.case_begin() + Index; + // Save the value of this case. + ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); + // Delete the unswitched cases. + SI.removeCase(CaseI); + } + + // Check if after this all of the remaining cases point at the same + // successor. + BasicBlock *CommonSuccBB = nullptr; + if (SI.getNumCases() > 0 && + std::all_of(std::next(SI.case_begin()), SI.case_end(), + [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == + SI.case_begin()->getCaseSuccessor(); + })) + CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + + if (DefaultExitBB) { + // We can't remove the default edge so replace it with an edge to either + // the single common remaining successor (if we have one) or an unreachable + // block. + if (CommonSuccBB) { + SI.setDefaultDest(CommonSuccBB); + } else { + BasicBlock *ParentBB = SI.getParent(); + BasicBlock *UnreachableBB = BasicBlock::Create( + ParentBB->getContext(), + Twine(ParentBB->getName()) + ".unreachable_default", + ParentBB->getParent()); + new UnreachableInst(ParentBB->getContext(), UnreachableBB); + SI.setDefaultDest(UnreachableBB); + DT.addNewBlock(UnreachableBB, ParentBB); + } + } else { + // If we're not unswitching the default, we need it to match any cases to + // have a common successor or if we have no cases it is the common + // successor. + if (SI.getNumCases() == 0) + CommonSuccBB = SI.getDefaultDest(); + else if (SI.getDefaultDest() != CommonSuccBB) + CommonSuccBB = nullptr; + } + + // Split the preheader, so that we know that there is a safe place to insert + // the switch. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + OldPH->getTerminator()->eraseFromParent(); + + // Now add the unswitched switch. + auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + + // Split any exit blocks with remaining in-loop predecessors. We walk in + // reverse so that we split in the same order as the cases appeared. This is + // purely for convenience of reading the resulting IR, but it doesn't cost + // anything really. + SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; + // Handle the default exit if necessary. + // FIXME: It'd be great if we could merge this with the loop below but LLVM's + // ranges aren't quite powerful enough yet. + if (DefaultExitBB && !pred_empty(DefaultExitBB)) { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + updateLoopExitIDom(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } + // Note that we must use a reference in the for loop so that we update the + // container. + for (auto &CasePair : reverse(ExitCases)) { + // Grab a reference to the exit block in the pair so that we can update it. + BasicBlock *&ExitBB = CasePair.second; + + // If this case is the last edge into the exit block, we can simply reuse it + // as it will no longer be a loop exit. No mapping necessary. + if (pred_empty(ExitBB)) + continue; + + // Otherwise we need to split the exit block so that we retain an exit + // block from the loop and a target for the unswitched condition. + BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; + if (!SplitExitBB) { + // If this is the first time we see this, do the split and remember it. + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + updateLoopExitIDom(ExitBB, L, DT); + } + ExitBB = SplitExitBB; + } + + // Now add the unswitched cases. We do this in reverse order as we built them + // in reverse order. + for (auto CasePair : reverse(ExitCases)) { + ConstantInt *CaseVal = CasePair.first; + BasicBlock *UnswitchedBB = CasePair.second; + + NewSI->addCase(CaseVal, UnswitchedBB); + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + } + + // If the default was unswitched, re-point it and add explicit cases for + // entering the loop. + if (DefaultExitBB) { + NewSI->setDefaultDest(DefaultExitBB); + updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + + // We removed all the exit cases, so we just copy the cases to the + // unswitched switch. + for (auto Case : SI.cases()) + NewSI->addCase(Case.getCaseValue(), NewPH); + } + + // If we ended up with a common successor for every path through the switch + // after unswitching, rewrite it to an unconditional branch to make it easy + // to recognize. Otherwise we potentially have to recognize the default case + // pointing at unreachable and other complexity. + if (CommonSuccBB) { + BasicBlock *BB = SI.getParent(); + SI.eraseFromParent(); + BranchInst::Create(CommonSuccBB, BB); + } + + DT.verifyDomTree(); + ++NumTrivial; + ++NumSwitches; + return true; +} + +/// This routine scans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether anything was unswitched (and therefore +/// changed). +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, + LoopInfo &LI) { + bool Changed = false; + + // If loop header has only one reachable successor we should keep looking for + // trivial condition candidates in the successor as well. An alternative is + // to constant fold conditions and merge successors into loop header (then we + // only need to check header's terminator). The reason for not doing this in + // LoopUnswitch pass is that it could potentially break LoopPassManager's + // invariants. Folding dead branches could either eliminate the current loop + // or make other loops unreachable. LCSSA form might also not be preserved + // after deleting branches. The following code keeps traversing loop header's + // successors until it finds the trivial condition candidate (condition that + // is not a constant). Since unswitching generates branches with constant + // conditions, this scenario could be very common in practice. + BasicBlock *CurrentBB = L.getHeader(); + SmallPtrSet<BasicBlock *, 8> Visited; + Visited.insert(CurrentBB); + do { + // Check if there are any side-effecting instructions (e.g. stores, calls, + // volatile loads) in the part of the loop that the code *would* execute + // without unswitching. + if (llvm::any_of(*CurrentBB, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return Changed; + + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + + if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { + // Don't bother trying to unswitch past a switch with a constant + // condition. This should be removed prior to running this pass by + // simplify-cfg. + if (isa<Constant>(SI->getCondition())) + return Changed; + + if (!unswitchTrivialSwitch(L, *SI, DT, LI)) + // Coludn't unswitch this one so we're done. + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // If unswitching turned the terminator into an unconditional branch then + // we can continue. The unswitching logic specifically works to fold any + // cases it can into an unconditional branch to make it easier to + // recognize here. + auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); + if (!BI || BI->isConditional()) + return Changed; + + CurrentBB = BI->getSuccessor(0); + continue; + } + + auto *BI = dyn_cast<BranchInst>(CurrentTerm); + if (!BI) + // We do not understand other terminator instructions. + return Changed; + + // Don't bother trying to unswitch past an unconditional branch or a branch + // with a constant value. These should be removed by simplify-cfg prior to + // running this pass. + if (!BI->isConditional() || isa<Constant>(BI->getCondition())) + return Changed; + + // Found a trivial condition candidate: non-foldable conditional branch. If + // we fail to unswitch this, we can't do anything else that is trivial. + if (!unswitchTrivialBranch(L, *BI, DT, LI)) + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // We unswitched the branch. This should always leave us with an + // unconditional branch that we can follow now. + BI = cast<BranchInst>(CurrentBB->getTerminator()); + assert(!BI->isConditional() && + "Cannot form a conditional branch by unswitching1"); + CurrentBB = BI->getSuccessor(0); + + // When continuing, if we exit the loop or reach a previous visited block, + // then we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch can happen. + } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + + return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC) { + assert(L.isLCSSAForm(DT) && + "Loops must be in LCSSA form before unswitching."); + bool Changed = false; + + // Must be in loop simplified form: we need a preheader and dedicated exits. + if (!L.isLoopSimplifyForm()) + return false; + + // Try trivial unswitch first before loop over other basic blocks in the loop. + Changed |= unswitchAllTrivialConditions(L, DT, LI); + + // FIXME: Add support for non-trivial unswitching by cloning the loop. + + return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + (void)F; + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) + return PreservedAnalyses::all(); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + AR.DT.verifyDomTree(); +#endif + return getLoopPassPreservedAnalyses(); +} + +namespace { +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { + initializeSimpleLoopUnswitchLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + getLoopAnalysisUsage(AU); + } +}; +} // namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + + bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + DT.verifyDomTree(); +#endif + return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { + return new SimpleLoopUnswitchLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index a7c308b59877..a0fc966cee2c 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -208,47 +208,6 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) { return false; } -static unsigned ComputeSpeculationCost(const Instruction *I, - const TargetTransformInfo &TTI) { - switch (Operator::getOpcode(I)) { - case Instruction::GetElementPtr: - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Select: - case Instruction::Shl: - case Instruction::Sub: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Xor: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::Call: - case Instruction::BitCast: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::AddrSpaceCast: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPExt: - case Instruction::FPTrunc: - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::ICmp: - case Instruction::FCmp: - return TTI.getUserCost(I); - - default: - return UINT_MAX; // Disallow anything not whitelisted. - } -} - bool SpeculativeExecutionPass::considerHoistingFromTo( BasicBlock &FromBlock, BasicBlock &ToBlock) { SmallSet<const Instruction *, 8> NotHoisted; @@ -264,7 +223,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( unsigned TotalSpeculationCost = 0; for (auto& I : FromBlock) { - const unsigned Cost = ComputeSpeculationCost(&I, *TTI); + const unsigned Cost = TTI->getUserCost(&I); if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { TotalSpeculationCost += Cost; diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 22af21d55c01..3d5cbfc93f2e 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted - // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. - SmallVector<WeakVH, 8> PHIs; + // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 6cd9f1614991..1956697ccb8b 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,7 +58,7 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; - F.setOnlyAccessesArgMemory (); + F.setOnlyAccessesArgMemory(); ++NumArgMemOnly; return true; } @@ -71,37 +71,36 @@ static bool setDoesNotThrow(Function &F) { return true; } -static bool setDoesNotCapture(Function &F, unsigned n) { - if (F.doesNotCapture(n)) +static bool setRetDoesNotAlias(Function &F) { + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias)) return false; - F.setDoesNotCapture(n); - ++NumNoCapture; + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + ++NumNoAlias; return true; } -static bool setOnlyReadsMemory(Function &F, unsigned n) { - if (F.onlyReadsMemory(n)) +static bool setDoesNotCapture(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) return false; - F.setOnlyReadsMemory(n); - ++NumReadOnlyArg; + F.addParamAttr(ArgNo, Attribute::NoCapture); + ++NumNoCapture; return true; } -static bool setDoesNotAlias(Function &F, unsigned n) { - if (F.doesNotAlias(n)) +static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; - F.setDoesNotAlias(n); - ++NumNoAlias; + F.addParamAttr(ArgNo, Attribute::ReadOnly); + ++NumReadOnlyArg; return true; } -static bool setNonNull(Function &F, unsigned n) { - assert( - (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) && - "nonnull applies only to pointers"); - if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) +static bool setRetNonNull(Function &F) { + assert(F.getReturnType()->isPointerTy() && + "nonnull applies only to pointers"); + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) return false; - F.addAttribute(n, Attribute::NonNull); + F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); ++NumNonNull; return true; } @@ -116,7 +115,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strlen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: case LibFunc_strrchr: @@ -131,8 +130,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strtold: case LibFunc_strtoull: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_strcpy: case LibFunc_stpcpy: @@ -141,14 +140,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncpy: case LibFunc_stpncpy: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 @@ -159,84 +158,84 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncasecmp: // Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strdup: case LibFunc_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat: case LibFunc_statvfs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_sprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_memcmp: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_memchr: case LibFunc_memrchr: @@ -247,100 +246,100 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: case LibFunc_mempcpy: case LibFunc_memccpy: case LibFunc_memmove: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_memalign: - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_mkdir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_rewind: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_readlink: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_bzero: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_calloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_chmod: case LibFunc_chown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_atoi: case LibFunc_atol: @@ -348,26 +347,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fdopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_feof: case LibFunc_free: @@ -384,11 +383,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_funlockfile: case LibFunc_ftrylockfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_ferror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); return Changed; case LibFunc_fputc: @@ -398,51 +397,51 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_frexpl: case LibFunc_fstatvfs: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 3); + Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? return Changed; case LibFunc_fputs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fgetpos: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_getc: case LibFunc_getlogin_r: case LibFunc_getc_unlocked: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_gets: case LibFunc_getchar: @@ -450,132 +449,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_getitimer: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ungetc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_pclose: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_vscanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vsscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_vfscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_vprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vsnprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_htonl: case LibFunc_htons: @@ -586,93 +585,93 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_lstat: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_under_IO_getc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_under_IO_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_dunder_isoc99_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_isoc99_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fopen64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_Znwj: // new(unsigned int) case LibFunc_Znwm: // new(unsigned long) @@ -683,17 +682,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_msvc_new_array_int: // new[](unsigned int) case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) // Operator new always returns a nonnull noalias pointer - Changed |= setNonNull(F, AttributeList::ReturnIndex); - Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex); + Changed |= setRetNonNull(F); + Changed |= setRetDoesNotAlias(F); return Changed; //TODO: add LibFunc entries for: //case LibFunc_memset_pattern4: //case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: @@ -889,7 +888,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 385c12302e04..d5124ac89016 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -245,7 +245,7 @@ namespace { void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ - WeakVH &BBEntry = VMap[BB]; + WeakTrackingVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the - // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 5d6fbc3325ff..6d56e08af99f 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1)); + CalledFunc->getParamAlignment(ArgNo)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { + if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index fe93d6927c63..42aca757c2af 100644 --- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -48,16 +49,6 @@ using namespace llvm; STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); -static cl::opt<bool> LibCallsShrinkWrapDoDomainError( - "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with domain errors")); -static cl::opt<bool> LibCallsShrinkWrapDoRangeError( - "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with range errors")); -static cl::opt<bool> LibCallsShrinkWrapDoPoleError( - "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with pole errors")); - namespace { class LibCallsShrinkWrapLegacyPass : public FunctionPass { public: @@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", namespace { class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { public: - LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; - bool isChanged() const { return Changed; } + LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) + : TLI(TLI), DT(DT){}; void visitCallInst(CallInst &CI) { checkCandidate(CI); } - void perform() { + bool perform() { + bool Changed = false; for (auto &CI : WorkList) { DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() << "\n"); @@ -94,6 +86,7 @@ public: DEBUG(dbgs() << "Transformed\n"); } } + return Changed; } private: @@ -134,8 +127,8 @@ private: } const TargetLibraryInfo &TLI; + DominatorTree *DT; SmallVector<CallInst *, 16> WorkList; - bool Changed; }; } // end anonymous namespace @@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_atanhf: // Same as atanh case LibFunc_atanhl: // Same as atanh { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); break; @@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_logbf: // Same as log case LibFunc_logbl: // Same as log { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); break; @@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_log1pf: // Same as log1p case LibFunc_log1pl: // Same as log1p { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); break; @@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, // RangeError: overflow or underflow case LibFunc_powf: case LibFunc_powl: { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || - !LibCallsShrinkWrapDoRangeError) - return false; Cond = generateCondForPow(CI, Func); if (Cond == nullptr) return false; @@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, UpperBound = 11356.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedOneCond; @@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, UpperBound = 11383.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedTwoCond; @@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, // Wrap conditions that can potentially generate errno to the library call. void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { - assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); + assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); MDNode *BranchWeights = MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + TerminatorInst *NewInst = - SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); BasicBlock *CallBB = NewInst->getParent(); CallBB->setName("cdce.call"); - CallBB->getSingleSuccessor()->setName("cdce.end"); + BasicBlock *SuccBB = CallBB->getSingleSuccessor(); + assert(SuccBB && "The split block should have a single successor"); + SuccBB->setName("cdce.end"); CI->removeFromParent(); CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); DEBUG(dbgs() << "== Basic Block After =="); @@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) { TLI.getLibFunc(*Callee, Func); assert(Func && "perform() is not expecting an empty function"); - if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) - return true; - - if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) + if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) return true; - return performCallErrors(CI, Func); } void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } -static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + DominatorTree *DT) { if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - LibCallsShrinkWrap CCDCE(TLI); + LibCallsShrinkWrap CCDCE(TLI, DT); CCDCE.visit(F); - CCDCE.perform(); - return CCDCE.isChanged(); + bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. +#ifndef NDEBUG + if (DT) + DT->verifyDomTree(); +#endif + return Changed; } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - return runImpl(F, TLI); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return runImpl(F, TLI, DT); } namespace llvm { @@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() { PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, FunctionAnalysisManager &FAM) { auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - bool Changed = runImpl(F, TLI); - if (!Changed) + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + if (!runImpl(F, TLI, DT)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); return PA; } } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index d3002c5fb750..ce6b703f3528 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // that can be removed. BB->removePredecessor(Pred, true); - WeakVH PhiIt = &BB->front(); + WeakTrackingVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; @@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { - if (Offset > 0) { - Ops.push_back(dwarf::DW_OP_plus); - Ops.push_back(Offset); - } else if (Offset < 0) { - Ops.push_back(dwarf::DW_OP_minus); - Ops.push_back(-Offset); - } -} - -enum { WithStackValue = true }; - -/// Prepend \p DIExpr with a deref and offset operation and optionally turn it -/// into a stack value. -static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset = 0, - bool StackValue = false) { - if (!Deref && !Offset && !StackValue) - return DIExpr; - - SmallVector<uint64_t, 8> Ops; - appendOffset(Ops, Offset); - if (Deref) - Ops.push_back(dwarf::DW_OP_deref); - if (DIExpr) - for (auto Op : DIExpr->expr_ops()) { - // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. - if (StackValue) { - if (Op.getOp() == dwarf::DW_OP_stack_value) - StackValue = false; - else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { - Ops.push_back(dwarf::DW_OP_stack_value); - StackValue = false; - } - } - Ops.push_back(Op.getOp()); - for (unsigned I = 0; I < Op.getNumArgs(); ++I) - Ops.push_back(Op.getArg(I)); - } - if (StackValue) - Ops.push_back(dwarf::DW_OP_stack_value); - return Builder.createExpression(Ops); -} bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, @@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIVar = DDI->getVariable(); auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - - DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset); - + DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, if (Offset) { SmallVector<uint64_t, 4> Ops; Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); + DIExpression::appendOffset(Ops, Offset); Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); DIExpr = Builder.createExpression(Ops); } @@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); // GEP offsets are i32 and thus always fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), - WithStackValue); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, + Offset.getSExtValue(), + DIExpression::WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present, because it uses a WeakVH. + // updates the CallGraph if present, because it uses a WeakTrackingVH. CI->replaceAllUsesWith(II); // Delete the original call diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index e7ba19665d59..72c06aef8037 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -628,7 +628,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 43ab725b0769..4ab4d7949d23 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already @@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, DL)) + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85db734fb182..391fde3b0b01 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -512,6 +512,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *Latch = L->getLoopLatch(); + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be the single exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + assert( + (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) && + "one of the loop latch successors should be " + "the exit block!"); + // Avoid warning of unused `LatchBR` variable in release builds. + (void)LatchBR; // Loop structure is the following: // // PreHeader diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index dbe42c201dd4..29d334f2968f 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions( ComdatEntriesCovered.end(); }); } + +std::string llvm::getUniqueModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef<uint8_t>{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a33b85c4ee69..cdba982e6641 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -225,10 +225,10 @@ struct PromoteMem2Reg { std::vector<AllocaInst *> Allocas; DominatorTree &DT; DIBuilder DIB; - /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; + const SimplifyQuery SQ; /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -270,7 +270,8 @@ public: AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC) {} + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} void run(); @@ -673,8 +674,6 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } - const DataLayout &DL = F.getParent()->getDataLayout(); - // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -699,7 +698,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f86e97b6cc72..7a3e8b9ae915 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, DL)) { + if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { @@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, DL)) { + if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; @@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, DL)) { + if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index a4cc6a031ad4..02a5d3dbeadf 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -51,13 +51,13 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -701,7 +701,7 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); @@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index 27373427d4f7..2509b5f22046 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = - SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { + if (Value *V = SimplifyInstruction(I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2c1c30463a23..9e71d746de34 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -537,7 +537,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::NoCapture); + CI->addParamAttr(0, Attribute::NoCapture); } return nullptr; diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 554944404708..f112c555205c 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } /// \brief Check that the Values in the slice in VL array are still existent in -/// the WeakVH array. +/// the WeakTrackingVH array. /// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, - unsigned SliceBegin, unsigned SliceSize) { +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, + ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin, + unsigned SliceSize) { VL = VL.slice(SliceBegin, SliceSize); VH = VH.slice(SliceBegin, SliceSize); return !std::equal(VL.begin(), VL.end(), VH.begin()); @@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, return false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end()); + SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end()); bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. @@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool Changed = false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); + SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end()); unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; @@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, namespace { /// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { +class WeakTrackingVHWithLevel final : public CallbackVH { /// Operand index of the instruction currently beeing analized. unsigned Level = 0; /// Is this the instruction that should be vectorized, or are we now @@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH { bool IsInitial = true; public: - explicit WeakVHWithLevel() = default; - WeakVHWithLevel(Value *V) : CallbackVH(V){}; + explicit WeakTrackingVHWithLevel() = default; + WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){}; /// Restart children analysis each time it is repaced by the new instruction. void allUsesReplacedWith(Value *New) override { setValPtr(New); @@ -4771,7 +4773,7 @@ public: cast<Instruction>(getValPtr())->getNumOperands() > Level); return cast<Instruction>(getValPtr())->getOperand(Level++); } - virtual ~WeakVHWithLevel() = default; + virtual ~WeakTrackingVHWithLevel() = default; }; } // namespace @@ -4793,7 +4795,7 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector<WeakVHWithLevel, 8> Stack(1, Root); + SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root); SmallSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { @@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakVHs will have nullified the + // initially collected them. If so, the WeakTrackingVHs will have + // nullified the // values, so remove them from the set of candidates. Candidates.remove(nullptr); |