diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-06 20:24:06 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-06 20:24:06 +0000 |
commit | 95ec533a1d8c450f6c6c5e84fe85423960e13382 (patch) | |
tree | bfe77b0dccd50ed2f4b4e6299d4bc4eaafced6e7 /contrib/llvm/lib/Transforms | |
parent | 2b532af82919b9141e7fd04becf354a0a7dfa813 (diff) | |
parent | 7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff) | |
download | src-95ec533a1d8c450f6c6c5e84fe85423960e13382.tar.gz src-95ec533a1d8c450f6c6c5e84fe85423960e13382.zip |
Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang400-import/; revision=311544
Diffstat (limited to 'contrib/llvm/lib/Transforms')
10 files changed, 460 insertions, 287 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp index 6dd95f8dcd55..6b32f6c31f72 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -36,7 +36,10 @@ using namespace llvm; -STATISTIC(NumImported, "Number of functions imported"); +STATISTIC(NumImportedFunctions, "Number of functions imported"); +STATISTIC(NumImportedModules, "Number of modules imported from"); +STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); +STATISTIC(NumLiveSymbols, "Number of live symbols in index"); /// Limit on instruction count of imported functions. static cl::opt<unsigned> ImportInstrLimit( @@ -69,6 +72,9 @@ static cl::opt<float> ImportColdMultiplier( static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden, cl::desc("Print imported functions")); +static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden, + cl::desc("Compute dead symbols")); + // Temporary allows the function import pass to disable always linking // referenced discardable symbols. static cl::opt<bool> @@ -105,78 +111,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName, namespace { -// Return true if the Summary describes a GlobalValue that can be externally -// referenced, i.e. it does not need renaming (linkage is not local) or renaming -// is possible (does not have a section for instance). -static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) { - if (!Summary.needsRenaming()) - return true; - - if (Summary.noRename()) - // Can't externally reference a global that needs renaming if has a section - // or is referenced from inline assembly, for example. - return false; - - return true; -} - -// Return true if \p GUID describes a GlobalValue that can be externally -// referenced, i.e. it does not need renaming (linkage is not local) or -// renaming is possible (does not have a section for instance). -static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index, - GlobalValue::GUID GUID) { - auto Summaries = Index.findGlobalValueSummaryList(GUID); - if (Summaries == Index.end()) - return true; - if (Summaries->second.size() != 1) - // If there are multiple globals with this GUID, then we know it is - // not a local symbol, and it is necessarily externally referenced. - return true; - - // We don't need to check for the module path, because if it can't be - // externally referenced and we call it, it is necessarilly in the same - // module - return canBeExternallyReferenced(**Summaries->second.begin()); -} - -// Return true if the global described by \p Summary can be imported in another -// module. -static bool eligibleForImport(const ModuleSummaryIndex &Index, - const GlobalValueSummary &Summary) { - if (!canBeExternallyReferenced(Summary)) - // Can't import a global that needs renaming if has a section for instance. - // FIXME: we may be able to import it by copying it without promotion. - return false; - - // Don't import functions that are not viable to inline. - if (Summary.isNotViableToInline()) - return false; - - // Check references (and potential calls) in the same module. If the current - // value references a global that can't be externally referenced it is not - // eligible for import. First check the flag set when we have possible - // opaque references (e.g. inline asm calls), then check the call and - // reference sets. - if (Summary.hasInlineAsmMaybeReferencingInternal()) - return false; - bool AllRefsCanBeExternallyReferenced = - llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) { - return canBeExternallyReferenced(Index, VI.getGUID()); - }); - if (!AllRefsCanBeExternallyReferenced) - return false; - - if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) { - bool AllCallsCanBeExternallyReferenced = llvm::all_of( - FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) { - return canBeExternallyReferenced(Index, Edge.first.getGUID()); - }); - if (!AllCallsCanBeExternallyReferenced) - return false; - } - return true; -} - /// Given a list of possible callee implementation for a call site, select one /// that fits the \p Threshold. /// @@ -214,7 +148,7 @@ selectCallee(const ModuleSummaryIndex &Index, if (Summary->instCount() > Threshold) return false; - if (!eligibleForImport(Index, *Summary)) + if (Summary->notEligibleToImport()) return false; return true; @@ -346,7 +280,8 @@ static void computeImportForFunction( static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { + StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr, + const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector<EdgeInfo, 128> Worklist; @@ -354,6 +289,10 @@ static void ComputeImportForModule( // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { + if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); + continue; + } auto *Summary = GVSummary.second; if (auto *AS = dyn_cast<AliasSummary>(Summary)) Summary = &AS->getAliasee(); @@ -393,14 +332,15 @@ void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, StringMap<FunctionImporter::ImportMapTy> &ImportLists, - StringMap<FunctionImporter::ExportSetTy> &ExportLists) { + StringMap<FunctionImporter::ExportSetTy> &ExportLists, + const DenseSet<GlobalValue::GUID> *DeadSymbols) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists); + &ExportLists, DeadSymbols); } // When computing imports we added all GUIDs referenced by anything @@ -462,6 +402,86 @@ void llvm::ComputeCrossModuleImportForModule( #endif } +DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols( + const ModuleSummaryIndex &Index, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + if (!ComputeDead) + return DenseSet<GlobalValue::GUID>(); + if (GUIDPreservedSymbols.empty()) + // Don't do anything when nothing is live, this is friendly with tests. + return DenseSet<GlobalValue::GUID>(); + DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols; + SmallVector<GlobalValue::GUID, 128> Worklist; + Worklist.reserve(LiveSymbols.size() * 2); + for (auto GUID : LiveSymbols) { + DEBUG(dbgs() << "Live root: " << GUID << "\n"); + Worklist.push_back(GUID); + } + // Add values flagged in the index as live roots to the worklist. + for (const auto &Entry : Index) { + bool IsLiveRoot = llvm::any_of( + Entry.second, + [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) { + return Summary->liveRoot(); + }); + if (!IsLiveRoot) + continue; + DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n"); + Worklist.push_back(Entry.first); + } + + while (!Worklist.empty()) { + auto GUID = Worklist.pop_back_val(); + auto It = Index.findGlobalValueSummaryList(GUID); + if (It == Index.end()) { + DEBUG(dbgs() << "Not in index: " << GUID << "\n"); + continue; + } + + // FIXME: we should only make the prevailing copy live here + for (auto &Summary : It->second) { + for (auto Ref : Summary->refs()) { + auto RefGUID = Ref.getGUID(); + if (LiveSymbols.insert(RefGUID).second) { + DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n"); + Worklist.push_back(RefGUID); + } + } + if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) { + for (auto Call : FS->calls()) { + auto CallGUID = Call.first.getGUID(); + if (LiveSymbols.insert(CallGUID).second) { + DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n"); + Worklist.push_back(CallGUID); + } + } + } + if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) { + auto AliaseeGUID = AS->getAliasee().getOriginalName(); + if (LiveSymbols.insert(AliaseeGUID).second) { + DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); + Worklist.push_back(AliaseeGUID); + } + } + } + } + DenseSet<GlobalValue::GUID> DeadSymbols; + DeadSymbols.reserve( + std::min(Index.size(), Index.size() - LiveSymbols.size())); + for (auto &Entry : Index) { + auto GUID = Entry.first; + if (!LiveSymbols.count(GUID)) { + DEBUG(dbgs() << "Marking dead: " << GUID << "\n"); + DeadSymbols.insert(GUID); + } + } + DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " + << DeadSymbols.size() << " symbols Dead \n"); + NumDeadSymbols += DeadSymbols.size(); + NumLiveSymbols += LiveSymbols.size(); + return DeadSymbols; +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( @@ -625,7 +645,6 @@ Expected<bool> FunctionImporter::importFunctions( // now, before linking it (otherwise this will be a noop). if (Error Err = SrcModule->materializeMetadata()) return std::move(Err); - UpgradeDebugInfo(*SrcModule); auto &ImportGUIDs = FunctionsToImportPerModule->second; // Find the globals to import @@ -698,6 +717,10 @@ Expected<bool> FunctionImporter::importFunctions( } } + // Upgrade debug info after we're done materializing all the globals and we + // have loaded all the required metadata! + UpgradeDebugInfo(*SrcModule); + // Link in the specified functions. if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) return true; @@ -717,9 +740,10 @@ Expected<bool> FunctionImporter::importFunctions( report_fatal_error("Function Import: link error"); ImportedCount += GlobalsToImport.size(); + NumImportedModules++; } - NumImported += ImportedCount; + NumImportedFunctions += ImportedCount; DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " << DestModule.getModuleIdentifier() << "\n"); diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 2948878cffc4..f4742aaf748f 100644 --- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -27,9 +27,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndexYAML.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" @@ -52,6 +55,20 @@ static cl::opt<bool> AvoidReuse( cl::desc("Try to avoid reuse of byte array addresses using aliases"), cl::Hidden, cl::init(true)); +static cl::opt<std::string> ClSummaryAction( + "lowertypetests-summary-action", + cl::desc("What to do with the summary when running this pass"), cl::Hidden); + +static cl::opt<std::string> ClReadSummary( + "lowertypetests-read-summary", + cl::desc("Read summary from given YAML file before running pass"), + cl::Hidden); + +static cl::opt<std::string> ClWriteSummary( + "lowertypetests-write-summary", + cl::desc("Write summary to given YAML file after running pass"), + cl::Hidden); + bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; @@ -66,38 +83,6 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { return Bits.count(BitOffset); } -bool BitSetInfo::containsValue( - const DataLayout &DL, - const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V, - uint64_t COffset) const { - if (auto GV = dyn_cast<GlobalObject>(V)) { - auto I = GlobalLayout.find(GV); - if (I == GlobalLayout.end()) - return false; - return containsGlobalOffset(I->second + COffset); - } - - if (auto GEP = dyn_cast<GEPOperator>(V)) { - APInt APOffset(DL.getPointerSizeInBits(0), 0); - bool Result = GEP->accumulateConstantOffset(DL, APOffset); - if (!Result) - return false; - COffset += APOffset.getZExtValue(); - return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset); - } - - if (auto Op = dyn_cast<Operator>(V)) { - if (Op->getOpcode() == Instruction::BitCast) - return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); - - if (Op->getOpcode() == Instruction::Select) - return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && - containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); - } - - return false; -} - void BitSetInfo::print(raw_ostream &OS) const { OS << "offset " << ByteOffset << " size " << BitSize << " align " << (1 << AlignLog2); @@ -204,7 +189,7 @@ struct ByteArrayInfo { std::set<uint64_t> Bits; uint64_t BitSize; GlobalVariable *ByteArray; - Constant *Mask; + GlobalVariable *MaskGlobal; }; /// A POD-like structure that we use to store a global reference together with @@ -241,6 +226,9 @@ public: class LowerTypeTestsModule { Module &M; + // This is for testing purposes only. + std::unique_ptr<ModuleSummaryIndex> OwnedSummary; + bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; Triple::OSType OS; @@ -248,6 +236,7 @@ class LowerTypeTestsModule { IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); + PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty); IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); @@ -259,6 +248,37 @@ class LowerTypeTestsModule { // Mapping from type identifiers to the call sites that test them. DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites; + /// This structure describes how to lower type tests for a particular type + /// identifier. It is either built directly from the global analysis (during + /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type + /// identifier summaries and external symbol references (in ThinLTO backends). + struct TypeIdLowering { + TypeTestResolution::Kind TheKind; + + /// All except Unsat: the start address within the combined global. + Constant *OffsetedGlobal; + + /// ByteArray, Inline, AllOnes: log2 of the required global alignment + /// relative to the start address. + Constant *AlignLog2; + + /// ByteArray, Inline, AllOnes: size of the memory region covering members + /// of this type identifier as a multiple of 2^AlignLog2. + Constant *Size; + + /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width. + unsigned SizeBitWidth; + + /// ByteArray: the byte array to test the address against. + Constant *TheByteArray; + + /// ByteArray: the bit mask to apply to bytes loaded from the byte array. + Constant *BitMask; + + /// Inline: the bit mask to test the address against. + Constant *InlineBits; + }; + std::vector<ByteArrayInfo> ByteArrayInfos; Function *WeakInitializerFn = nullptr; @@ -268,15 +288,13 @@ class LowerTypeTestsModule { const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); - Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, Value *BitOffset); void lowerTypeTestCalls( ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); - Value * - lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - Constant *CombinedGlobal, - const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, + const TypeIdLowering &TIL); void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals); unsigned getJumpTableEntrySize(); @@ -302,6 +320,7 @@ class LowerTypeTestsModule { public: LowerTypeTestsModule(Module &M); + ~LowerTypeTestsModule(); bool lower(); }; @@ -380,7 +399,7 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) { BAI->Bits = BSI.Bits; BAI->BitSize = BSI.BitSize; BAI->ByteArray = ByteArrayGlobal; - BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); + BAI->MaskGlobal = MaskGlobal; return BAI; } @@ -399,8 +418,9 @@ void LowerTypeTestsModule::allocateByteArrays() { uint8_t Mask; BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); - BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); - cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); + BAI->MaskGlobal->replaceAllUsesWith( + ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy)); + BAI->MaskGlobal->eraseFromParent(); } Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes); @@ -435,101 +455,121 @@ void LowerTypeTestsModule::allocateByteArrays() { ByteArraySizeBytes = BAB.Bytes.size(); } -/// Build a test that bit BitOffset is set in BSI, where -/// BitSetGlobal is a global containing the bits in BSI. -Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, - ByteArrayInfo *&BAI, +/// Build a test that bit BitOffset is set in the type identifier that was +/// lowered to TIL, which must be either an Inline or a ByteArray. +Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, + const TypeIdLowering &TIL, Value *BitOffset) { - if (BSI.BitSize <= 64) { + if (TIL.TheKind == TypeTestResolution::Inline) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. - IntegerType *BitsTy; - if (BSI.BitSize <= 32) - BitsTy = Int32Ty; - else - BitsTy = Int64Ty; - - uint64_t Bits = 0; - for (auto Bit : BSI.Bits) - Bits |= uint64_t(1) << Bit; - Constant *BitsConst = ConstantInt::get(BitsTy, Bits); - return createMaskedBitTest(B, BitsConst, BitOffset); + return createMaskedBitTest(B, TIL.InlineBits, BitOffset); } else { - if (!BAI) { - ++NumByteArraysCreated; - BAI = createByteArray(BSI); - } - - Constant *ByteArray = BAI->ByteArray; - Type *Ty = BAI->ByteArray->getValueType(); + Constant *ByteArray = TIL.TheByteArray; if (!LinkerSubsectionsViaSymbols && AvoidReuse) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. - ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, - GlobalValue::PrivateLinkage, "bits_use", - ByteArray, &M); + ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage, + "bits_use", ByteArray, &M); } - Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); + Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset); Value *Byte = B.CreateLoad(ByteAddr); - Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); + Value *ByteAndMask = + B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty)); return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); } } +static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, + Value *V, uint64_t COffset) { + if (auto GV = dyn_cast<GlobalObject>(V)) { + SmallVector<MDNode *, 2> Types; + GV->getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + if (Type->getOperand(1) != TypeId) + continue; + uint64_t Offset = + cast<ConstantInt>( + cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) + ->getZExtValue(); + if (COffset == Offset) + return true; + } + return false; + } + + if (auto GEP = dyn_cast<GEPOperator>(V)) { + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); + if (!Result) + return false; + COffset += APOffset.getZExtValue(); + return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset); + } + + if (auto Op = dyn_cast<Operator>(V)) { + if (Op->getOpcode() == Instruction::BitCast) + return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset); + + if (Op->getOpcode() == Instruction::Select) + return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) && + isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset); + } + + return false; +} + /// Lower a llvm.type.test call to its implementation. Returns the value to /// replace the call with. -Value *LowerTypeTestsModule::lowerBitSetCall( - CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - Constant *CombinedGlobalIntAddr, - const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { +Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, + const TypeIdLowering &TIL) { + if (TIL.TheKind == TypeTestResolution::Unsat) + return ConstantInt::getFalse(M.getContext()); + Value *Ptr = CI->getArgOperand(0); const DataLayout &DL = M.getDataLayout(); - - if (BSI.containsValue(DL, GlobalLayout, Ptr)) + if (isKnownTypeIdMember(TypeId, DL, Ptr, 0)) return ConstantInt::getTrue(M.getContext()); - Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( - CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); - BasicBlock *InitialBB = CI->getParent(); IRBuilder<> B(CI); Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); - if (BSI.isSingleOffset()) + Constant *OffsetedGlobalAsInt = + ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy); + if (TIL.TheKind == TypeTestResolution::Single) return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); - Value *BitOffset; - if (BSI.AlignLog2 == 0) { - BitOffset = PtrOffset; - } else { - // We need to check that the offset both falls within our range and is - // suitably aligned. We can check both properties at the same time by - // performing a right rotate by log2(alignment) followed by an integer - // comparison against the bitset size. The rotate will move the lower - // order bits that need to be zero into the higher order bits of the - // result, causing the comparison to fail if they are nonzero. The rotate - // also conveniently gives us a bit offset to use during the load from - // the bitset. - Value *OffsetSHR = - B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); - Value *OffsetSHL = B.CreateShl( - PtrOffset, - ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); - BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); - } - - Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(alignment) followed by an integer + // comparison against the bitset size. The rotate will move the lower + // order bits that need to be zero into the higher order bits of the + // result, causing the comparison to fail if they are nonzero. The rotate + // also conveniently gives us a bit offset to use during the load from + // the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, ConstantExpr::getZExt( + ConstantExpr::getSub( + ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)), + TIL.AlignLog2), + IntPtrTy)); + Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + + Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy); Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); // If the bit set is all ones, testing against it is unnecessary. - if (BSI.isAllOnes()) + if (TIL.TheKind == TypeTestResolution::AllOnes) return OffsetInRange; TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); @@ -537,7 +577,7 @@ Value *LowerTypeTestsModule::lowerBitSetCall( // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. - Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); + Value *Bit = createBitSetTest(ThenB, TIL, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if @@ -622,11 +662,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { - Constant *CombinedGlobalIntAddr = - ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); - DenseMap<GlobalObject *, uint64_t> GlobalObjLayout; - for (auto &P : GlobalLayout) - GlobalObjLayout[P.first->getGlobal()] = P.second; + CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy); // For each type identifier in this disjoint set... for (Metadata *TypeId : TypeIds) { @@ -640,13 +676,43 @@ void LowerTypeTestsModule::lowerTypeTestCalls( BSI.print(dbgs()); }); - ByteArrayInfo *BAI = nullptr; + TypeIdLowering TIL; + TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr( + Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)), + TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2); + if (BSI.isAllOnes()) { + TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single + : TypeTestResolution::AllOnes; + TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32; + TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty, + BSI.BitSize); + } else if (BSI.BitSize <= 64) { + TIL.TheKind = TypeTestResolution::Inline; + TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6; + TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize); + uint64_t InlineBits = 0; + for (auto Bit : BSI.Bits) + InlineBits |= uint64_t(1) << Bit; + if (InlineBits == 0) + TIL.TheKind = TypeTestResolution::Unsat; + else + TIL.InlineBits = ConstantInt::get( + (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits); + } else { + TIL.TheKind = TypeTestResolution::ByteArray; + TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32; + TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty, + BSI.BitSize); + ++NumByteArraysCreated; + ByteArrayInfo *BAI = createByteArray(BSI); + TIL.TheByteArray = BAI->ByteArray; + TIL.BitMask = BAI->MaskGlobal; + } // Lower each call to llvm.type.test for this type identifier. for (CallInst *CI : TypeTestCallSites[TypeId]) { ++NumTypeTestCallsLowered; - Value *Lowered = - lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout); + Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -1080,6 +1146,22 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { + // Handle the command-line summary arguments. This code is for testing + // purposes only, so we handle errors directly. + if (!ClSummaryAction.empty()) { + OwnedSummary = make_unique<ModuleSummaryIndex>(); + if (!ClReadSummary.empty()) { + ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + + ": "); + auto ReadSummaryFile = + ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); + + yaml::Input In(ReadSummaryFile->getBuffer()); + In >> *OwnedSummary; + ExitOnErr(errorCodeToError(In.error())); + } + } + Triple TargetTriple(M.getTargetTriple()); LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); Arch = TargetTriple.getArch(); @@ -1087,6 +1169,20 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { ObjectFormat = TargetTriple.getObjectFormat(); } +LowerTypeTestsModule::~LowerTypeTestsModule() { + if (ClSummaryAction.empty() || ClWriteSummary.empty()) + return; + + ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + + ": "); + std::error_code EC; + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + ExitOnErr(errorCodeToError(EC)); + + yaml::Output Out(OS); + Out << *OwnedSummary; +} + bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f863d192fc2f..b29ed3c87451 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1637,6 +1637,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::cos: + case Intrinsic::amdgcn_cos: { + Value *SrcSrc; + Value *Src = II->getArgOperand(0); + if (match(Src, m_FNeg(m_Value(SrcSrc))) || + match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) { + // cos(-x) -> cos(x) + // cos(fabs(x)) -> cos(x) + II->setArgOperand(0, SrcSrc); + return II; + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6a7cb0e45c63..1d5528398776 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -514,7 +514,8 @@ struct AddressSanitizer : public FunctionPass { void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp); - void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr, + void instrumentUnusualSizeOrAlignment(Instruction *I, + Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp); @@ -1056,20 +1057,18 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, return nullptr; *IsWrite = false; } - // Only instrument if the mask is constant for now. - if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) { - auto BasePtr = CI->getOperand(0 + OpOffset); - auto Ty = cast<PointerType>(BasePtr->getType())->getElementType(); - *TypeSize = DL.getTypeStoreSizeInBits(Ty); - if (auto AlignmentConstant = - dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset))) - *Alignment = (unsigned)AlignmentConstant->getZExtValue(); - else - *Alignment = 1; // No alignment guarantees. We probably got Undef - if (MaybeMask) - *MaybeMask = CI->getOperand(2 + OpOffset); - PtrOperand = BasePtr; - } + + auto BasePtr = CI->getOperand(0 + OpOffset); + auto Ty = cast<PointerType>(BasePtr->getType())->getElementType(); + *TypeSize = DL.getTypeStoreSizeInBits(Ty); + if (auto AlignmentConstant = + dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset))) + *Alignment = (unsigned)AlignmentConstant->getZExtValue(); + else + *Alignment = 1; // No alignment guarantees. We probably got Undef + if (MaybeMask) + *MaybeMask = CI->getOperand(2 + OpOffset); + PtrOperand = BasePtr; } } @@ -1130,24 +1129,25 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction( } static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, - Value *Addr, unsigned Alignment, - unsigned Granularity, uint32_t TypeSize, - bool IsWrite, Value *SizeArgument, - bool UseCalls, uint32_t Exp) { + Instruction *InsertBefore, Value *Addr, + unsigned Alignment, unsigned Granularity, + uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls, + uint32_t Exp) { // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check // if the data is properly aligned. if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) - return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, - UseCalls, Exp); - Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr, - UseCalls, Exp); + return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite, + nullptr, UseCalls, Exp); + Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize, + IsWrite, nullptr, UseCalls, Exp); } static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, - ConstantVector *Mask, Instruction *I, + Value *Mask, Instruction *I, Value *Addr, unsigned Alignment, unsigned Granularity, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, @@ -1157,15 +1157,30 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, unsigned Num = VTy->getVectorNumElements(); auto Zero = ConstantInt::get(IntptrTy, 0); for (unsigned Idx = 0; Idx < Num; ++Idx) { - // dyn_cast as we might get UndefValue - auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx)); - if (Masked && Masked->isAllOnesValue()) { + Value *InstrumentedAddress = nullptr; + Instruction *InsertBefore = I; + if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { + // dyn_cast as we might get UndefValue + if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { + if (Masked->isNullValue()) + // Mask is constant false, so no instrumentation needed. + continue; + // If we have a true or undef value, fall through to doInstrumentAddress + // with InsertBefore == I + } + } else { IRBuilder<> IRB(I); - auto InstrumentedAddress = - IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); - doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity, - ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); + Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); + TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); + InsertBefore = ThenTerm; } + + IRBuilder<> IRB(InsertBefore); + InstrumentedAddress = + IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); + doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment, + Granularity, ElemTypeSize, IsWrite, SizeArgument, + UseCalls, Exp); } } @@ -1220,12 +1235,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, unsigned Granularity = 1 << Mapping.Scale; if (MaybeMask) { - auto Mask = cast<ConstantVector>(MaybeMask); - instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment, - Granularity, TypeSize, IsWrite, nullptr, - UseCalls, Exp); + instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr, + Alignment, Granularity, TypeSize, IsWrite, + nullptr, UseCalls, Exp); } else { - doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize, + doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize, IsWrite, nullptr, UseCalls, Exp); } } @@ -1342,9 +1356,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. void AddressSanitizer::instrumentUnusualSizeOrAlignment( - Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument, bool UseCalls, uint32_t Exp) { - IRBuilder<> IRB(I); + Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, + bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { + IRBuilder<> IRB(InsertBefore); Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { @@ -1358,8 +1372,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( Value *LastByte = IRB.CreateIntToPtr( IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), Addr->getType()); - instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp); - instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp); } } diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 9485bfd7c296..0137378b828b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1572,6 +1572,13 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Assign value numbers to the new instructions. for (Instruction *I : NewInsts) { + // Instructions that have been inserted in predecessor(s) to materialize + // the load address do not retain their original debug locations. Doing + // so could lead to confusing (but correct) source attributions. + // FIXME: How do we retain source locations without causing poor debugging + // behavior? + I->setDebugLoc(DebugLoc()); + // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into // ordering issues. If a block hasn't been processed yet, we would be @@ -1601,8 +1608,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); - // Transfer DebugLoc. - NewLoad->setDebugLoc(LI->getDebugLoc()); + // We do not propagate the old load's debug location, because the new + // load now lives in a different BB, and we want to avoid a jumpy line + // table. + // FIXME: How do we retain source locations without causing poor debugging + // behavior? // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 1cc5c8f0da84..6ef9d0561322 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -408,6 +408,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, CurAST->deleteValue(&I); I.eraseFromParent(); } + Changed = true; continue; } @@ -766,6 +767,14 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); + // Do not retain debug locations when we are moving instructions to different + // basic blocks, because we want to avoid jumpy line tables. Calls, however, + // need to retain their debug locs because they may be inlined. + // FIXME: How do we retain source locations without causing poor debugging + // behavior? + if (!isa<CallInst>(I)) + I.setDebugLoc(DebugLoc()); + if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) @@ -911,14 +920,23 @@ bool llvm::promoteLoopAccessesToScalars( // // If at least one store is guaranteed to execute, both properties are // satisfied, and promotion is legal. + // // This, however, is not a necessary condition. Even if no store/load is - // guaranteed to execute, we can still establish these properties: - // (p1) by proving that hoisting the load into the preheader is - // safe (i.e. proving dereferenceability on all paths through the loop). We + // guaranteed to execute, we can still establish these properties. + // We can establish (p1) by proving that hoisting the load into the preheader + // is safe (i.e. proving dereferenceability on all paths through the loop). We // can use any access within the alias set to prove dereferenceability, // since they're all must alias. - // (p2) by proving the memory is thread-local, so the memory model + // + // There are two ways establish (p2): + // a) Prove the location is thread-local. In this case the memory model // requirement does not apply, and stores are safe to insert. + // b) Prove a store dominates every exit block. In this case, if an exit + // blocks is reached, the original dynamic path would have taken us through + // the store, so inserting a store into the exit block is safe. Note that this + // is different from the store being guaranteed to execute. For instance, + // if an exception is thrown on the first iteration of the loop, the original + // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; bool SafeToInsertStore = false; @@ -1000,6 +1018,17 @@ bool llvm::promoteLoopAccessesToScalars( } } + // If a store dominates all exit blocks, it is safe to sink. + // As explained above, if an exit block was executed, a dominating + // store must have been been executed at least once, so we are not + // introducing stores on paths that did not have them. + // Note that this only looks at explicit exit blocks. If we ever + // start sinking stores into unwind edges (see above), this will break. + if (!SafeToInsertStore) + SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { + return DT->dominates(Store->getParent(), Exit); + }); + // If the store is not guaranteed to execute, we may still get // deref info through it. if (!DereferenceableInPH) { diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index fd167db11789..2743574ecca6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -997,7 +997,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { /// Check if the given conditional branch is based on the comparison between /// a variable and zero, and if the variable is non-zero, the control yields to /// the loop entry. If the branch matches the behavior, the variable involved -/// in the comparion is returned. This function will be called to see if the +/// in the comparison is returned. This function will be called to see if the /// precondition and postcondition of the loop are in desirable form. static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { if (!BI || !BI->isConditional()) diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp index 90309d7ebba6..f64354497771 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -283,8 +283,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // sinked. for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) { Instruction *I = &*II++; - if (!L.hasLoopInvariantOperands(I) || - !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 440e36767edf..678d02e05d42 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -56,12 +56,9 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( if (!isPerformingImport() && !isModuleExporting()) return false; - // If we are exporting, we need to see whether this value is marked - // as NoRename in the summary. If we are importing, we may not have - // a summary in the distributed backend case (only summaries for values - // importes as defs, not references, are included in the index passed - // to the distributed backends). if (isPerformingImport()) { + assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) && + "Attempting to promote non-renamable local"); // We don't know for sure yet if we are importing this value (as either // a reference or a def), since we are simply walking all values in the // module. But by necessity if we end up importing it and it is local, @@ -77,13 +74,28 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( assert(Summaries->second.size() == 1 && "Local has more than one summary"); auto Linkage = Summaries->second.front()->linkage(); if (!GlobalValue::isLocalLinkage(Linkage)) { - assert(!Summaries->second.front()->noRename()); + assert(!isNonRenamableLocal(*SGV) && + "Attempting to promote non-renamable local"); return true; } return false; } +#ifndef NDEBUG +bool FunctionImportGlobalProcessing::isNonRenamableLocal( + const GlobalValue &GV) const { + if (!GV.hasLocalLinkage()) + return false; + // This needs to stay in sync with the logic in buildModuleSummaryIndex. + if (GV.hasSection()) + return true; + if (Used.count(const_cast<GlobalValue *>(&GV))) + return true; + return false; +} +#endif + std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV, bool DoPromote) { // For locals that must be promoted to global scope, ensure that diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8cde0c4cd607..31daba2248aa 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { return Cost; } -/// \brief Check whether the address computation for a non-consecutive memory -/// access looks like an unlikely candidate for being merged into the indexing -/// mode. +/// \brief Gets Address Access SCEV after verifying that the access pattern +/// is loop invariant except the induction variable dependence. /// -/// We look for a GEP which has one index that is an induction variable and all -/// other indices are loop invariant. If the stride of this access is also -/// within a small bound we decide that this address computation can likely be -/// merged into the addressing mode. -/// In all other cases, we identify the address computation as complex. -static bool isLikelyComplexAddressComputation(Value *Ptr, - LoopVectorizationLegality *Legal, - ScalarEvolution *SE, - const Loop *TheLoop) { +/// This SCEV can be sent to the Target in order to estimate the address +/// calculation cost. +static const SCEV *getAddressAccessSCEV( + Value *Ptr, + LoopVectorizationLegality *Legal, + ScalarEvolution *SE, + const Loop *TheLoop) { auto *Gep = dyn_cast<GetElementPtrInst>(Ptr); if (!Gep) - return true; + return nullptr; // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. @@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr, Value *Opd = Gep->getOperand(i); if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && !Legal->isInductionVariable(Opd)) - return true; + return nullptr; } - // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step - // can likely be merged into the address computation. - unsigned MaxMergeDistance = 64; - - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr)); - if (!AddRec) - return true; - - // Check the step is constant. - const SCEV *Step = AddRec->getStepRecurrence(*SE); - // Calculate the pointer stride and check if it is consecutive. - const auto *C = dyn_cast<SCEVConstant>(Step); - if (!C) - return true; - - const APInt &APStepVal = C->getAPInt(); - - // Huge step value - give up. - if (APStepVal.getBitWidth() > 64) - return true; - - int64_t StepVal = APStepVal.getSExtValue(); - - return StepVal > MaxMergeDistance; + // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. + return SE->getSCEV(Ptr); } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { @@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned Cost = 0; Type *PtrTy = ToVectorTy(Ptr->getType(), VF); - // True if the memory instruction's address computation is complex. - bool IsComplexComputation = - isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop); + // Figure out whether the access is strided and get the stride value + // if it's known in compile time + const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop); // Get the cost of the scalar memory instruction and address computation. - Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation); + Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS); |