aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-06 20:24:06 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-06 20:24:06 +0000
commit95ec533a1d8c450f6c6c5e84fe85423960e13382 (patch)
treebfe77b0dccd50ed2f4b4e6299d4bc4eaafced6e7 /contrib/llvm/lib/Transforms
parent2b532af82919b9141e7fd04becf354a0a7dfa813 (diff)
parent7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff)
downloadsrc-95ec533a1d8c450f6c6c5e84fe85423960e13382.tar.gz
src-95ec533a1d8c450f6c6c5e84fe85423960e13382.zip
Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.
Notes
Notes: svn path=/projects/clang400-import/; revision=311544
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp182
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp316
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp59
10 files changed, 460 insertions, 287 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 6dd95f8dcd55..6b32f6c31f72 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -36,7 +36,10 @@
using namespace llvm;
-STATISTIC(NumImported, "Number of functions imported");
+STATISTIC(NumImportedFunctions, "Number of functions imported");
+STATISTIC(NumImportedModules, "Number of modules imported from");
+STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
+STATISTIC(NumLiveSymbols, "Number of live symbols in index");
/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(
@@ -69,6 +72,9 @@ static cl::opt<float> ImportColdMultiplier(
static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
cl::desc("Print imported functions"));
+static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
+ cl::desc("Compute dead symbols"));
+
// Temporary allows the function import pass to disable always linking
// referenced discardable symbols.
static cl::opt<bool>
@@ -105,78 +111,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
namespace {
-// Return true if the Summary describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or renaming
-// is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {
- if (!Summary.needsRenaming())
- return true;
-
- if (Summary.noRename())
- // Can't externally reference a global that needs renaming if has a section
- // or is referenced from inline assembly, for example.
- return false;
-
- return true;
-}
-
-// Return true if \p GUID describes a GlobalValue that can be externally
-// referenced, i.e. it does not need renaming (linkage is not local) or
-// renaming is possible (does not have a section for instance).
-static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index,
- GlobalValue::GUID GUID) {
- auto Summaries = Index.findGlobalValueSummaryList(GUID);
- if (Summaries == Index.end())
- return true;
- if (Summaries->second.size() != 1)
- // If there are multiple globals with this GUID, then we know it is
- // not a local symbol, and it is necessarily externally referenced.
- return true;
-
- // We don't need to check for the module path, because if it can't be
- // externally referenced and we call it, it is necessarilly in the same
- // module
- return canBeExternallyReferenced(**Summaries->second.begin());
-}
-
-// Return true if the global described by \p Summary can be imported in another
-// module.
-static bool eligibleForImport(const ModuleSummaryIndex &Index,
- const GlobalValueSummary &Summary) {
- if (!canBeExternallyReferenced(Summary))
- // Can't import a global that needs renaming if has a section for instance.
- // FIXME: we may be able to import it by copying it without promotion.
- return false;
-
- // Don't import functions that are not viable to inline.
- if (Summary.isNotViableToInline())
- return false;
-
- // Check references (and potential calls) in the same module. If the current
- // value references a global that can't be externally referenced it is not
- // eligible for import. First check the flag set when we have possible
- // opaque references (e.g. inline asm calls), then check the call and
- // reference sets.
- if (Summary.hasInlineAsmMaybeReferencingInternal())
- return false;
- bool AllRefsCanBeExternallyReferenced =
- llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {
- return canBeExternallyReferenced(Index, VI.getGUID());
- });
- if (!AllRefsCanBeExternallyReferenced)
- return false;
-
- if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) {
- bool AllCallsCanBeExternallyReferenced = llvm::all_of(
- FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
- return canBeExternallyReferenced(Index, Edge.first.getGUID());
- });
- if (!AllCallsCanBeExternallyReferenced)
- return false;
- }
- return true;
-}
-
/// Given a list of possible callee implementation for a call site, select one
/// that fits the \p Threshold.
///
@@ -214,7 +148,7 @@ selectCallee(const ModuleSummaryIndex &Index,
if (Summary->instCount() > Threshold)
return false;
- if (!eligibleForImport(Index, *Summary))
+ if (Summary->notEligibleToImport())
return false;
return true;
@@ -346,7 +280,8 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -354,6 +289,10 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
+ if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
+ continue;
+ }
auto *Summary = GVSummary.second;
if (auto *AS = dyn_cast<AliasSummary>(Summary))
Summary = &AS->getAliasee();
@@ -393,14 +332,15 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ const DenseSet<GlobalValue::GUID> *DeadSymbols) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists);
+ &ExportLists, DeadSymbols);
}
// When computing imports we added all GUIDs referenced by anything
@@ -462,6 +402,86 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
+DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
+ const ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ if (!ComputeDead)
+ return DenseSet<GlobalValue::GUID>();
+ if (GUIDPreservedSymbols.empty())
+ // Don't do anything when nothing is live, this is friendly with tests.
+ return DenseSet<GlobalValue::GUID>();
+ DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols;
+ SmallVector<GlobalValue::GUID, 128> Worklist;
+ Worklist.reserve(LiveSymbols.size() * 2);
+ for (auto GUID : LiveSymbols) {
+ DEBUG(dbgs() << "Live root: " << GUID << "\n");
+ Worklist.push_back(GUID);
+ }
+ // Add values flagged in the index as live roots to the worklist.
+ for (const auto &Entry : Index) {
+ bool IsLiveRoot = llvm::any_of(
+ Entry.second,
+ [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
+ return Summary->liveRoot();
+ });
+ if (!IsLiveRoot)
+ continue;
+ DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
+ Worklist.push_back(Entry.first);
+ }
+
+ while (!Worklist.empty()) {
+ auto GUID = Worklist.pop_back_val();
+ auto It = Index.findGlobalValueSummaryList(GUID);
+ if (It == Index.end()) {
+ DEBUG(dbgs() << "Not in index: " << GUID << "\n");
+ continue;
+ }
+
+ // FIXME: we should only make the prevailing copy live here
+ for (auto &Summary : It->second) {
+ for (auto Ref : Summary->refs()) {
+ auto RefGUID = Ref.getGUID();
+ if (LiveSymbols.insert(RefGUID).second) {
+ DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n");
+ Worklist.push_back(RefGUID);
+ }
+ }
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
+ for (auto Call : FS->calls()) {
+ auto CallGUID = Call.first.getGUID();
+ if (LiveSymbols.insert(CallGUID).second) {
+ DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n");
+ Worklist.push_back(CallGUID);
+ }
+ }
+ }
+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+ auto AliaseeGUID = AS->getAliasee().getOriginalName();
+ if (LiveSymbols.insert(AliaseeGUID).second) {
+ DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
+ Worklist.push_back(AliaseeGUID);
+ }
+ }
+ }
+ }
+ DenseSet<GlobalValue::GUID> DeadSymbols;
+ DeadSymbols.reserve(
+ std::min(Index.size(), Index.size() - LiveSymbols.size()));
+ for (auto &Entry : Index) {
+ auto GUID = Entry.first;
+ if (!LiveSymbols.count(GUID)) {
+ DEBUG(dbgs() << "Marking dead: " << GUID << "\n");
+ DeadSymbols.insert(GUID);
+ }
+ }
+ DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
+ << DeadSymbols.size() << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols.size();
+ NumLiveSymbols += LiveSymbols.size();
+ return DeadSymbols;
+}
+
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
@@ -625,7 +645,6 @@ Expected<bool> FunctionImporter::importFunctions(
// now, before linking it (otherwise this will be a noop).
if (Error Err = SrcModule->materializeMetadata())
return std::move(Err);
- UpgradeDebugInfo(*SrcModule);
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
@@ -698,6 +717,10 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
+ // Upgrade debug info after we're done materializing all the globals and we
+ // have loaded all the required metadata!
+ UpgradeDebugInfo(*SrcModule);
+
// Link in the specified functions.
if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
return true;
@@ -717,9 +740,10 @@ Expected<bool> FunctionImporter::importFunctions(
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
+ NumImportedModules++;
}
- NumImported += ImportedCount;
+ NumImportedFunctions += ImportedCount;
DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
<< DestModule.getModuleIdentifier() << "\n");
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 2948878cffc4..f4742aaf748f 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -27,9 +27,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
@@ -52,6 +55,20 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClSummaryAction(
+ "lowertypetests-summary-action",
+ cl::desc("What to do with the summary when running this pass"), cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "lowertypetests-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "lowertypetests-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (Offset < ByteOffset)
return false;
@@ -66,38 +83,6 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
return Bits.count(BitOffset);
}
-bool BitSetInfo::containsValue(
- const DataLayout &DL,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
- uint64_t COffset) const {
- if (auto GV = dyn_cast<GlobalObject>(V)) {
- auto I = GlobalLayout.find(GV);
- if (I == GlobalLayout.end())
- return false;
- return containsGlobalOffset(I->second + COffset);
- }
-
- if (auto GEP = dyn_cast<GEPOperator>(V)) {
- APInt APOffset(DL.getPointerSizeInBits(0), 0);
- bool Result = GEP->accumulateConstantOffset(DL, APOffset);
- if (!Result)
- return false;
- COffset += APOffset.getZExtValue();
- return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset);
- }
-
- if (auto Op = dyn_cast<Operator>(V)) {
- if (Op->getOpcode() == Instruction::BitCast)
- return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
-
- if (Op->getOpcode() == Instruction::Select)
- return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
- containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
- }
-
- return false;
-}
-
void BitSetInfo::print(raw_ostream &OS) const {
OS << "offset " << ByteOffset << " size " << BitSize << " align "
<< (1 << AlignLog2);
@@ -204,7 +189,7 @@ struct ByteArrayInfo {
std::set<uint64_t> Bits;
uint64_t BitSize;
GlobalVariable *ByteArray;
- Constant *Mask;
+ GlobalVariable *MaskGlobal;
};
/// A POD-like structure that we use to store a global reference together with
@@ -241,6 +226,9 @@ public:
class LowerTypeTestsModule {
Module &M;
+ // This is for testing purposes only.
+ std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
+
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
Triple::OSType OS;
@@ -248,6 +236,7 @@ class LowerTypeTestsModule {
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
@@ -259,6 +248,37 @@ class LowerTypeTestsModule {
// Mapping from type identifiers to the call sites that test them.
DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ /// This structure describes how to lower type tests for a particular type
+ /// identifier. It is either built directly from the global analysis (during
+ /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
+ /// identifier summaries and external symbol references (in ThinLTO backends).
+ struct TypeIdLowering {
+ TypeTestResolution::Kind TheKind;
+
+ /// All except Unsat: the start address within the combined global.
+ Constant *OffsetedGlobal;
+
+ /// ByteArray, Inline, AllOnes: log2 of the required global alignment
+ /// relative to the start address.
+ Constant *AlignLog2;
+
+ /// ByteArray, Inline, AllOnes: size of the memory region covering members
+ /// of this type identifier as a multiple of 2^AlignLog2.
+ Constant *Size;
+
+ /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width.
+ unsigned SizeBitWidth;
+
+ /// ByteArray: the byte array to test the address against.
+ Constant *TheByteArray;
+
+ /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
+ Constant *BitMask;
+
+ /// Inline: the bit mask to test the address against.
+ Constant *InlineBits;
+ };
+
std::vector<ByteArrayInfo> ByteArrayInfos;
Function *WeakInitializerFn = nullptr;
@@ -268,15 +288,13 @@ class LowerTypeTestsModule {
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
- Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
Value *BitOffset);
void lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
- Value *
- lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobal,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL);
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalTypeMember *> Globals);
unsigned getJumpTableEntrySize();
@@ -302,6 +320,7 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M);
+ ~LowerTypeTestsModule();
bool lower();
};
@@ -380,7 +399,7 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
BAI->Bits = BSI.Bits;
BAI->BitSize = BSI.BitSize;
BAI->ByteArray = ByteArrayGlobal;
- BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ BAI->MaskGlobal = MaskGlobal;
return BAI;
}
@@ -399,8 +418,9 @@ void LowerTypeTestsModule::allocateByteArrays() {
uint8_t Mask;
BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
- BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
- cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ BAI->MaskGlobal->replaceAllUsesWith(
+ ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
+ BAI->MaskGlobal->eraseFromParent();
}
Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
@@ -435,101 +455,121 @@ void LowerTypeTestsModule::allocateByteArrays() {
ByteArraySizeBytes = BAB.Bytes.size();
}
-/// Build a test that bit BitOffset is set in BSI, where
-/// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
- ByteArrayInfo *&BAI,
+/// Build a test that bit BitOffset is set in the type identifier that was
+/// lowered to TIL, which must be either an Inline or a ByteArray.
+Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
+ const TypeIdLowering &TIL,
Value *BitOffset) {
- if (BSI.BitSize <= 64) {
+ if (TIL.TheKind == TypeTestResolution::Inline) {
// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.
- IntegerType *BitsTy;
- if (BSI.BitSize <= 32)
- BitsTy = Int32Ty;
- else
- BitsTy = Int64Ty;
-
- uint64_t Bits = 0;
- for (auto Bit : BSI.Bits)
- Bits |= uint64_t(1) << Bit;
- Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
- return createMaskedBitTest(B, BitsConst, BitOffset);
+ return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
- if (!BAI) {
- ++NumByteArraysCreated;
- BAI = createByteArray(BSI);
- }
-
- Constant *ByteArray = BAI->ByteArray;
- Type *Ty = BAI->ByteArray->getValueType();
+ Constant *ByteArray = TIL.TheByteArray;
if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
- ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
- GlobalValue::PrivateLinkage, "bits_use",
- ByteArray, &M);
+ ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
+ "bits_use", ByteArray, &M);
}
- Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
Value *Byte = B.CreateLoad(ByteAddr);
- Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ Value *ByteAndMask =
+ B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
}
}
+static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
+ Value *V, uint64_t COffset) {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
+ SmallVector<MDNode *, 2> Types;
+ GV->getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ if (Type->getOperand(1) != TypeId)
+ continue;
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+ if (COffset == Offset)
+ return true;
+ }
+ return false;
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
+ isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
/// Lower a llvm.type.test call to its implementation. Returns the value to
/// replace the call with.
-Value *LowerTypeTestsModule::lowerBitSetCall(
- CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
- Constant *CombinedGlobalIntAddr,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
+ const TypeIdLowering &TIL) {
+ if (TIL.TheKind == TypeTestResolution::Unsat)
+ return ConstantInt::getFalse(M.getContext());
+
Value *Ptr = CI->getArgOperand(0);
const DataLayout &DL = M.getDataLayout();
-
- if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
return ConstantInt::getTrue(M.getContext());
- Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
- CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
-
BasicBlock *InitialBB = CI->getParent();
IRBuilder<> B(CI);
Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
- if (BSI.isSingleOffset())
+ Constant *OffsetedGlobalAsInt =
+ ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
+ if (TIL.TheKind == TypeTestResolution::Single)
return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
- Value *BitOffset;
- if (BSI.AlignLog2 == 0) {
- BitOffset = PtrOffset;
- } else {
- // We need to check that the offset both falls within our range and is
- // suitably aligned. We can check both properties at the same time by
- // performing a right rotate by log2(alignment) followed by an integer
- // comparison against the bitset size. The rotate will move the lower
- // order bits that need to be zero into the higher order bits of the
- // result, causing the comparison to fail if they are nonzero. The rotate
- // also conveniently gives us a bit offset to use during the load from
- // the bitset.
- Value *OffsetSHR =
- B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
- Value *OffsetSHL = B.CreateShl(
- PtrOffset,
- ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
- BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- }
-
- Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset, ConstantExpr::getZExt(
+ ConstantExpr::getSub(
+ ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
+ TIL.AlignLog2),
+ IntPtrTy));
+ Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+
+ Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy);
Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
// If the bit set is all ones, testing against it is unnecessary.
- if (BSI.isAllOnes())
+ if (TIL.TheKind == TypeTestResolution::AllOnes)
return OffsetInRange;
TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
@@ -537,7 +577,7 @@ Value *LowerTypeTestsModule::lowerBitSetCall(
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
- Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+ Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
// The value we want is 0 if we came directly from the initial block
// (having failed the range or alignment checks), or the loaded bit if
@@ -622,11 +662,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
- Constant *CombinedGlobalIntAddr =
- ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
- DenseMap<GlobalObject *, uint64_t> GlobalObjLayout;
- for (auto &P : GlobalLayout)
- GlobalObjLayout[P.first->getGlobal()] = P.second;
+ CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
// For each type identifier in this disjoint set...
for (Metadata *TypeId : TypeIds) {
@@ -640,13 +676,43 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
BSI.print(dbgs());
});
- ByteArrayInfo *BAI = nullptr;
+ TypeIdLowering TIL;
+ TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
+ Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
+ TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ if (BSI.isAllOnes()) {
+ TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
+ : TypeTestResolution::AllOnes;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ } else if (BSI.BitSize <= 64) {
+ TIL.TheKind = TypeTestResolution::Inline;
+ TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6;
+ TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize);
+ uint64_t InlineBits = 0;
+ for (auto Bit : BSI.Bits)
+ InlineBits |= uint64_t(1) << Bit;
+ if (InlineBits == 0)
+ TIL.TheKind = TypeTestResolution::Unsat;
+ else
+ TIL.InlineBits = ConstantInt::get(
+ (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
+ } else {
+ TIL.TheKind = TypeTestResolution::ByteArray;
+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;
+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,
+ BSI.BitSize);
+ ++NumByteArraysCreated;
+ ByteArrayInfo *BAI = createByteArray(BSI);
+ TIL.TheByteArray = BAI->ByteArray;
+ TIL.BitMask = BAI->MaskGlobal;
+ }
// Lower each call to llvm.type.test for this type identifier.
for (CallInst *CI : TypeTestCallSites[TypeId]) {
++NumTypeTestCallsLowered;
- Value *Lowered =
- lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout);
+ Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
@@ -1080,6 +1146,22 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClSummaryAction.empty()) {
+ OwnedSummary = make_unique<ModuleSummaryIndex>();
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> *OwnedSummary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+ }
+
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
@@ -1087,6 +1169,20 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
ObjectFormat = TargetTriple.getObjectFormat();
}
+LowerTypeTestsModule::~LowerTypeTestsModule() {
+ if (ClSummaryAction.empty() || ClWriteSummary.empty())
+ return;
+
+ ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
+ ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << *OwnedSummary;
+}
+
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f863d192fc2f..b29ed3c87451 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1637,6 +1637,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::cos:
+ case Intrinsic::amdgcn_cos: {
+ Value *SrcSrc;
+ Value *Src = II->getArgOperand(0);
+ if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
+ match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
+ // cos(-x) -> cos(x)
+ // cos(fabs(x)) -> cos(x)
+ II->setArgOperand(0, SrcSrc);
+ return II;
+ }
+
+ break;
+ }
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6a7cb0e45c63..1d5528398776 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -514,7 +514,8 @@ struct AddressSanitizer : public FunctionPass {
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls, uint32_t Exp);
- void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+ void instrumentUnusualSizeOrAlignment(Instruction *I,
+ Instruction *InsertBefore, Value *Addr,
uint32_t TypeSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp);
@@ -1056,20 +1057,18 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
return nullptr;
*IsWrite = false;
}
- // Only instrument if the mask is constant for now.
- if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {
- auto BasePtr = CI->getOperand(0 + OpOffset);
- auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
- *TypeSize = DL.getTypeStoreSizeInBits(Ty);
- if (auto AlignmentConstant =
- dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- *Alignment = (unsigned)AlignmentConstant->getZExtValue();
- else
- *Alignment = 1; // No alignment guarantees. We probably got Undef
- if (MaybeMask)
- *MaybeMask = CI->getOperand(2 + OpOffset);
- PtrOperand = BasePtr;
- }
+
+ auto BasePtr = CI->getOperand(0 + OpOffset);
+ auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+ *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+ if (auto AlignmentConstant =
+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+ *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+ else
+ *Alignment = 1; // No alignment guarantees. We probably got Undef
+ if (MaybeMask)
+ *MaybeMask = CI->getOperand(2 + OpOffset);
+ PtrOperand = BasePtr;
}
}
@@ -1130,24 +1129,25 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
}
static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
- Value *Addr, unsigned Alignment,
- unsigned Granularity, uint32_t TypeSize,
- bool IsWrite, Value *SizeArgument,
- bool UseCalls, uint32_t Exp) {
+ Instruction *InsertBefore, Value *Addr,
+ unsigned Alignment, unsigned Granularity,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
// if the data is properly aligned.
if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
TypeSize == 128) &&
(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
- return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
- Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+ IsWrite, nullptr, UseCalls, Exp);
}
static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
const DataLayout &DL, Type *IntptrTy,
- ConstantVector *Mask, Instruction *I,
+ Value *Mask, Instruction *I,
Value *Addr, unsigned Alignment,
unsigned Granularity, uint32_t TypeSize,
bool IsWrite, Value *SizeArgument,
@@ -1157,15 +1157,30 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
unsigned Num = VTy->getVectorNumElements();
auto Zero = ConstantInt::get(IntptrTy, 0);
for (unsigned Idx = 0; Idx < Num; ++Idx) {
- // dyn_cast as we might get UndefValue
- auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));
- if (Masked && Masked->isAllOnesValue()) {
+ Value *InstrumentedAddress = nullptr;
+ Instruction *InsertBefore = I;
+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+ // dyn_cast as we might get UndefValue
+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+ if (Masked->isNullValue())
+ // Mask is constant false, so no instrumentation needed.
+ continue;
+ // If we have a true or undef value, fall through to doInstrumentAddress
+ // with InsertBefore == I
+ }
+ } else {
IRBuilder<> IRB(I);
- auto InstrumentedAddress =
- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
- doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,
- ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+ TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ InsertBefore = ThenTerm;
}
+
+ IRBuilder<> IRB(InsertBefore);
+ InstrumentedAddress =
+ IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
+ Granularity, ElemTypeSize, IsWrite, SizeArgument,
+ UseCalls, Exp);
}
}
@@ -1220,12 +1235,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
unsigned Granularity = 1 << Mapping.Scale;
if (MaybeMask) {
- auto Mask = cast<ConstantVector>(MaybeMask);
- instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,
- Granularity, TypeSize, IsWrite, nullptr,
- UseCalls, Exp);
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr,
+ Alignment, Granularity, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
} else {
- doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,
+ doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize,
IsWrite, nullptr, UseCalls, Exp);
}
}
@@ -1342,9 +1356,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
void AddressSanitizer::instrumentUnusualSizeOrAlignment(
- Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls, uint32_t Exp) {
- IRBuilder<> IRB(I);
+ Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(InsertBefore);
Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
@@ -1358,8 +1372,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
Value *LastByte = IRB.CreateIntToPtr(
IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
Addr->getType());
- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 9485bfd7c296..0137378b828b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1572,6 +1572,13 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Assign value numbers to the new instructions.
for (Instruction *I : NewInsts) {
+ // Instructions that have been inserted in predecessor(s) to materialize
+ // the load address do not retain their original debug locations. Doing
+ // so could lead to confusing (but correct) source attributions.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ I->setDebugLoc(DebugLoc());
+
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
@@ -1601,8 +1608,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
- // Transfer DebugLoc.
- NewLoad->setDebugLoc(LI->getDebugLoc());
+ // We do not propagate the old load's debug location, because the new
+ // load now lives in a different BB, and we want to avoid a jumpy line
+ // table.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 1cc5c8f0da84..6ef9d0561322 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -408,6 +408,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurAST->deleteValue(&I);
I.eraseFromParent();
}
+ Changed = true;
continue;
}
@@ -766,6 +767,14 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Do not retain debug locations when we are moving instructions to different
+ // basic blocks, because we want to avoid jumpy line tables. Calls, however,
+ // need to retain their debug locs because they may be inlined.
+ // FIXME: How do we retain source locations without causing poor debugging
+ // behavior?
+ if (!isa<CallInst>(I))
+ I.setDebugLoc(DebugLoc());
+
if (isa<LoadInst>(I))
++NumMovedLoads;
else if (isa<CallInst>(I))
@@ -911,14 +920,23 @@ bool llvm::promoteLoopAccessesToScalars(
//
// If at least one store is guaranteed to execute, both properties are
// satisfied, and promotion is legal.
+ //
// This, however, is not a necessary condition. Even if no store/load is
- // guaranteed to execute, we can still establish these properties:
- // (p1) by proving that hoisting the load into the preheader is
- // safe (i.e. proving dereferenceability on all paths through the loop). We
+ // guaranteed to execute, we can still establish these properties.
+ // We can establish (p1) by proving that hoisting the load into the preheader
+ // is safe (i.e. proving dereferenceability on all paths through the loop). We
// can use any access within the alias set to prove dereferenceability,
// since they're all must alias.
- // (p2) by proving the memory is thread-local, so the memory model
+ //
+ // There are two ways establish (p2):
+ // a) Prove the location is thread-local. In this case the memory model
// requirement does not apply, and stores are safe to insert.
+ // b) Prove a store dominates every exit block. In this case, if an exit
+ // blocks is reached, the original dynamic path would have taken us through
+ // the store, so inserting a store into the exit block is safe. Note that this
+ // is different from the store being guaranteed to execute. For instance,
+ // if an exception is thrown on the first iteration of the loop, the original
+ // store is never executed, but the exit blocks are not executed either.
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
@@ -1000,6 +1018,17 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If a store dominates all exit blocks, it is safe to sink.
+ // As explained above, if an exit block was executed, a dominating
+ // store must have been been executed at least once, so we are not
+ // introducing stores on paths that did not have them.
+ // Note that this only looks at explicit exit blocks. If we ever
+ // start sinking stores into unwind edges (see above), this will break.
+ if (!SafeToInsertStore)
+ SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
+ return DT->dominates(Store->getParent(), Exit);
+ });
+
// If the store is not guaranteed to execute, we may still get
// deref info through it.
if (!DereferenceableInPH) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fd167db11789..2743574ecca6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -997,7 +997,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero, the control yields to
/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparion is returned. This function will be called to see if the
+/// in the comparison is returned. This function will be called to see if the
/// precondition and postcondition of the loop are in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!BI || !BI->isConditional())
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 90309d7ebba6..f64354497771 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -283,8 +283,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// sinked.
for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
Instruction *I = &*II++;
- if (!L.hasLoopInvariantOperands(I) ||
- !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
continue;
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 440e36767edf..678d02e05d42 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -56,12 +56,9 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
if (!isPerformingImport() && !isModuleExporting())
return false;
- // If we are exporting, we need to see whether this value is marked
- // as NoRename in the summary. If we are importing, we may not have
- // a summary in the distributed backend case (only summaries for values
- // importes as defs, not references, are included in the index passed
- // to the distributed backends).
if (isPerformingImport()) {
+ assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&
+ "Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
// module. But by necessity if we end up importing it and it is local,
@@ -77,13 +74,28 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
assert(Summaries->second.size() == 1 && "Local has more than one summary");
auto Linkage = Summaries->second.front()->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
- assert(!Summaries->second.front()->noRename());
+ assert(!isNonRenamableLocal(*SGV) &&
+ "Attempting to promote non-renamable local");
return true;
}
return false;
}
+#ifndef NDEBUG
+bool FunctionImportGlobalProcessing::isNonRenamableLocal(
+ const GlobalValue &GV) const {
+ if (!GV.hasLocalLinkage())
+ return false;
+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.
+ if (GV.hasSection())
+ return true;
+ if (Used.count(const_cast<GlobalValue *>(&GV)))
+ return true;
+ return false;
+}
+#endif
+
std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
bool DoPromote) {
// For locals that must be promoted to global scope, ensure that
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8cde0c4cd607..31daba2248aa 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
-/// \brief Check whether the address computation for a non-consecutive memory
-/// access looks like an unlikely candidate for being merged into the indexing
-/// mode.
+/// \brief Gets Address Access SCEV after verifying that the access pattern
+/// is loop invariant except the induction variable dependence.
///
-/// We look for a GEP which has one index that is an induction variable and all
-/// other indices are loop invariant. If the stride of this access is also
-/// within a small bound we decide that this address computation can likely be
-/// merged into the addressing mode.
-/// In all other cases, we identify the address computation as complex.
-static bool isLikelyComplexAddressComputation(Value *Ptr,
- LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
- const Loop *TheLoop) {
+/// This SCEV can be sent to the Target in order to estimate the address
+/// calculation cost.
+static const SCEV *getAddressAccessSCEV(
+ Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
- return true;
+ return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
@@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
- return true;
+ return nullptr;
}
- // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
- // can likely be merged into the address computation.
- unsigned MaxMergeDistance = 64;
-
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
- if (!AddRec)
- return true;
-
- // Check the step is constant.
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- const auto *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
- return true;
-
- const APInt &APStepVal = C->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return true;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- return StepVal > MaxMergeDistance;
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
+ return SE->getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned Cost = 0;
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
- // True if the memory instruction's address computation is complex.
- bool IsComplexComputation =
- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+ // Figure out whether the access is strided and get the stride value
+ // if it's known in compile time
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
Cost += VF *
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);