src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-01-06 20:24:06 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-06 20:24:06 +0000
commit	95ec533a1d8c450f6c6c5e84fe85423960e13382 (patch)
tree	bfe77b0dccd50ed2f4b4e6299d4bc4eaafced6e7 /contrib/llvm/lib/Transforms
parent	2b532af82919b9141e7fd04becf354a0a7dfa813 (diff)
parent	7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff)
download	src-95ec533a1d8c450f6c6c5e84fe85423960e13382.tar.gz src-95ec533a1d8c450f6c6c5e84fe85423960e13382.zip

Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.

Notes

Notes: svn path=/projects/clang400-import/; revision=311544

Diffstat (limited to 'contrib/llvm/lib/Transforms')

-rw-r--r--

contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp

182

-rw-r--r--

contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp

316

-rw-r--r--

contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/GVN.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/LICM.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp

-rw-r--r--

contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

10 files changed, 460 insertions, 287 deletions

diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 6dd95f8dcd55..6b32f6c31f72 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp

@@ -36,7 +36,10 @@

using namespace llvm;

-STATISTIC(NumImported, "Number of functions imported");

+STATISTIC(NumImportedFunctions, "Number of functions imported");

+STATISTIC(NumImportedModules, "Number of modules imported from");

+STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");

+STATISTIC(NumLiveSymbols, "Number of live symbols in index");

/// Limit on instruction count of imported functions.

static cl::opt<unsigned> ImportInstrLimit(

@@ -69,6 +72,9 @@ static cl::opt<float> ImportColdMultiplier(

static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,

cl::desc("Print imported functions"));

+static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,

+ cl::desc("Compute dead symbols"));

// Temporary allows the function import pass to disable always linking

// referenced discardable symbols.

static cl::opt<bool>

@@ -105,78 +111,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,

namespace {

-// Return true if the Summary describes a GlobalValue that can be externally

-// referenced, i.e. it does not need renaming (linkage is not local) or renaming

-// is possible (does not have a section for instance).

-static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {

- if (!Summary.needsRenaming())

- return true;

- if (Summary.noRename())

- // Can't externally reference a global that needs renaming if has a section

- // or is referenced from inline assembly, for example.

- return false;

- return true;

-// Return true if \p GUID describes a GlobalValue that can be externally

-// referenced, i.e. it does not need renaming (linkage is not local) or

-// renaming is possible (does not have a section for instance).

-static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index,

- GlobalValue::GUID GUID) {

- auto Summaries = Index.findGlobalValueSummaryList(GUID);

- if (Summaries == Index.end())

- return true;

- if (Summaries->second.size() != 1)

- // If there are multiple globals with this GUID, then we know it is

- // not a local symbol, and it is necessarily externally referenced.

- return true;

- // We don't need to check for the module path, because if it can't be

- // externally referenced and we call it, it is necessarilly in the same

- // module

- return canBeExternallyReferenced(**Summaries->second.begin());

-// Return true if the global described by \p Summary can be imported in another

-// module.

-static bool eligibleForImport(const ModuleSummaryIndex &Index,

- const GlobalValueSummary &Summary) {

- if (!canBeExternallyReferenced(Summary))

- // Can't import a global that needs renaming if has a section for instance.

- // FIXME: we may be able to import it by copying it without promotion.

- return false;

- // Don't import functions that are not viable to inline.

- if (Summary.isNotViableToInline())

- return false;

- // Check references (and potential calls) in the same module. If the current

- // value references a global that can't be externally referenced it is not

- // eligible for import. First check the flag set when we have possible

- // opaque references (e.g. inline asm calls), then check the call and

- // reference sets.

- if (Summary.hasInlineAsmMaybeReferencingInternal())

- return false;

- bool AllRefsCanBeExternallyReferenced =

- llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {

- return canBeExternallyReferenced(Index, VI.getGUID());

- });

- if (!AllRefsCanBeExternallyReferenced)

- return false;

- if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) {

- bool AllCallsCanBeExternallyReferenced = llvm::all_of(

- FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {

- return canBeExternallyReferenced(Index, Edge.first.getGUID());

- });

- if (!AllCallsCanBeExternallyReferenced)

- return false;

- }

- return true;

/// Given a list of possible callee implementation for a call site, select one

/// that fits the \p Threshold.

///

@@ -214,7 +148,7 @@ selectCallee(const ModuleSummaryIndex &Index,

if (Summary->instCount() > Threshold)

return false;

- if (!eligibleForImport(Index, *Summary))

+ if (Summary->notEligibleToImport())

return false;

return true;

@@ -346,7 +280,8 @@ static void computeImportForFunction(

static void ComputeImportForModule(

const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,

FunctionImporter::ImportMapTy &ImportList,

- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {

+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,

+ const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {

// Worklist contains the list of function imported in this module, for which

// we will analyse the callees and may import further down the callgraph.

SmallVector<EdgeInfo, 128> Worklist;

@@ -354,6 +289,10 @@ static void ComputeImportForModule(

// Populate the worklist with the import for the functions in the current

// module

for (auto &GVSummary : DefinedGVSummaries) {

+ if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {

+ DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");

+ continue;

+ }

auto *Summary = GVSummary.second;

if (auto *AS = dyn_cast<AliasSummary>(Summary))

Summary = &AS->getAliasee();

@@ -393,14 +332,15 @@ void llvm::ComputeCrossModuleImport(

const ModuleSummaryIndex &Index,

const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,

StringMap<FunctionImporter::ImportMapTy> &ImportLists,

- StringMap<FunctionImporter::ExportSetTy> &ExportLists) {

+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,

+ const DenseSet<GlobalValue::GUID> *DeadSymbols) {

// For each module that has function defined, compute the import/export lists.

for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {

auto &ImportList = ImportLists[DefinedGVSummaries.first()];

DEBUG(dbgs() << "Computing import for Module '"

<< DefinedGVSummaries.first() << "'\n");

ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,

- &ExportLists);

+ &ExportLists, DeadSymbols);

}

// When computing imports we added all GUIDs referenced by anything

@@ -462,6 +402,86 @@ void llvm::ComputeCrossModuleImportForModule(

#endif

}

+DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(

+ const ModuleSummaryIndex &Index,

+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {

+ if (!ComputeDead)

+ return DenseSet<GlobalValue::GUID>();

+ if (GUIDPreservedSymbols.empty())

+ // Don't do anything when nothing is live, this is friendly with tests.

+ return DenseSet<GlobalValue::GUID>();

+ DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols;

+ SmallVector<GlobalValue::GUID, 128> Worklist;

+ Worklist.reserve(LiveSymbols.size() * 2);

+ for (auto GUID : LiveSymbols) {

+ DEBUG(dbgs() << "Live root: " << GUID << "\n");

+ Worklist.push_back(GUID);

+ }

+ // Add values flagged in the index as live roots to the worklist.

+ for (const auto &Entry : Index) {

+ bool IsLiveRoot = llvm::any_of(

+ Entry.second,

+ [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {

+ return Summary->liveRoot();

+ });

+ if (!IsLiveRoot)

+ continue;

+ DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");

+ Worklist.push_back(Entry.first);

+ }

+ while (!Worklist.empty()) {

+ auto GUID = Worklist.pop_back_val();

+ auto It = Index.findGlobalValueSummaryList(GUID);

+ if (It == Index.end()) {

+ DEBUG(dbgs() << "Not in index: " << GUID << "\n");

+ continue;

+ }

+ // FIXME: we should only make the prevailing copy live here

+ for (auto &Summary : It->second) {

+ for (auto Ref : Summary->refs()) {

+ auto RefGUID = Ref.getGUID();

+ if (LiveSymbols.insert(RefGUID).second) {

+ DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n");

+ Worklist.push_back(RefGUID);

+ }

+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {

+ for (auto Call : FS->calls()) {

+ auto CallGUID = Call.first.getGUID();

+ if (LiveSymbols.insert(CallGUID).second) {

+ DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n");

+ Worklist.push_back(CallGUID);

+ }

+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {

+ auto AliaseeGUID = AS->getAliasee().getOriginalName();

+ if (LiveSymbols.insert(AliaseeGUID).second) {

+ DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");

+ Worklist.push_back(AliaseeGUID);

+ }

+ DenseSet<GlobalValue::GUID> DeadSymbols;

+ DeadSymbols.reserve(

+ std::min(Index.size(), Index.size() - LiveSymbols.size()));

+ for (auto &Entry : Index) {

+ auto GUID = Entry.first;

+ if (!LiveSymbols.count(GUID)) {

+ DEBUG(dbgs() << "Marking dead: " << GUID << "\n");

+ DeadSymbols.insert(GUID);

+ }

+ DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "

+ << DeadSymbols.size() << " symbols Dead \n");

+ NumDeadSymbols += DeadSymbols.size();

+ NumLiveSymbols += LiveSymbols.size();

+ return DeadSymbols;

/// Compute the set of summaries needed for a ThinLTO backend compilation of

/// \p ModulePath.

void llvm::gatherImportedSummariesForModule(

@@ -625,7 +645,6 @@ Expected<bool> FunctionImporter::importFunctions(

// now, before linking it (otherwise this will be a noop).

if (Error Err = SrcModule->materializeMetadata())

return std::move(Err);

- UpgradeDebugInfo(*SrcModule);

auto &ImportGUIDs = FunctionsToImportPerModule->second;

// Find the globals to import

@@ -698,6 +717,10 @@ Expected<bool> FunctionImporter::importFunctions(

}

+ // Upgrade debug info after we're done materializing all the globals and we

+ // have loaded all the required metadata!

+ UpgradeDebugInfo(*SrcModule);

// Link in the specified functions.

if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))

return true;

@@ -717,9 +740,10 @@ Expected<bool> FunctionImporter::importFunctions(

report_fatal_error("Function Import: link error");

ImportedCount += GlobalsToImport.size();

+ NumImportedModules++;

}

- NumImported += ImportedCount;

+ NumImportedFunctions += ImportedCount;

DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "

<< DestModule.getModuleIdentifier() << "\n");

diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 2948878cffc4..f4742aaf748f 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp

@@ -27,9 +27,12 @@

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Module.h"

+#include "llvm/IR/ModuleSummaryIndexYAML.h"

#include "llvm/IR/Operator.h"

#include "llvm/Pass.h"

#include "llvm/Support/Debug.h"

+#include "llvm/Support/Error.h"

+#include "llvm/Support/FileSystem.h"

#include "llvm/Support/TrailingObjects.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/IPO.h"

@@ -52,6 +55,20 @@ static cl::opt<bool> AvoidReuse(

cl::desc("Try to avoid reuse of byte array addresses using aliases"),

cl::Hidden, cl::init(true));

+static cl::opt<std::string> ClSummaryAction(

+ "lowertypetests-summary-action",

+ cl::desc("What to do with the summary when running this pass"), cl::Hidden);

+static cl::opt<std::string> ClReadSummary(

+ "lowertypetests-read-summary",

+ cl::desc("Read summary from given YAML file before running pass"),

+ cl::Hidden);

+static cl::opt<std::string> ClWriteSummary(

+ "lowertypetests-write-summary",

+ cl::desc("Write summary to given YAML file after running pass"),

+ cl::Hidden);

bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {

if (Offset < ByteOffset)

return false;

@@ -66,38 +83,6 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {

return Bits.count(BitOffset);

}

-bool BitSetInfo::containsValue(

- const DataLayout &DL,

- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,

- uint64_t COffset) const {

- if (auto GV = dyn_cast<GlobalObject>(V)) {

- auto I = GlobalLayout.find(GV);

- if (I == GlobalLayout.end())

- return false;

- return containsGlobalOffset(I->second + COffset);

- }

- if (auto GEP = dyn_cast<GEPOperator>(V)) {

- APInt APOffset(DL.getPointerSizeInBits(0), 0);

- bool Result = GEP->accumulateConstantOffset(DL, APOffset);

- if (!Result)

- return false;

- COffset += APOffset.getZExtValue();

- return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset);

- }

- if (auto Op = dyn_cast<Operator>(V)) {

- if (Op->getOpcode() == Instruction::BitCast)

- return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);

- if (Op->getOpcode() == Instruction::Select)

- return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&

- containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);

- }

- return false;

void BitSetInfo::print(raw_ostream &OS) const {

OS << "offset " << ByteOffset << " size " << BitSize << " align "

<< (1 << AlignLog2);

@@ -204,7 +189,7 @@ struct ByteArrayInfo {

std::set<uint64_t> Bits;

uint64_t BitSize;

GlobalVariable *ByteArray;

- Constant *Mask;

+ GlobalVariable *MaskGlobal;

};

/// A POD-like structure that we use to store a global reference together with

@@ -241,6 +226,9 @@ public:

class LowerTypeTestsModule {

Module &M;

+ // This is for testing purposes only.

+ std::unique_ptr<ModuleSummaryIndex> OwnedSummary;

bool LinkerSubsectionsViaSymbols;

Triple::ArchType Arch;

Triple::OSType OS;

@@ -248,6 +236,7 @@ class LowerTypeTestsModule {

IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());

IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());

+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());

IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());

PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);

IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());

@@ -259,6 +248,37 @@ class LowerTypeTestsModule {

// Mapping from type identifiers to the call sites that test them.

DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;

+ /// This structure describes how to lower type tests for a particular type

+ /// identifier. It is either built directly from the global analysis (during

+ /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type

+ /// identifier summaries and external symbol references (in ThinLTO backends).

+ struct TypeIdLowering {

+ TypeTestResolution::Kind TheKind;

+ /// All except Unsat: the start address within the combined global.

+ Constant *OffsetedGlobal;

+ /// ByteArray, Inline, AllOnes: log2 of the required global alignment

+ /// relative to the start address.

+ Constant *AlignLog2;

+ /// ByteArray, Inline, AllOnes: size of the memory region covering members

+ /// of this type identifier as a multiple of 2^AlignLog2.

+ Constant *Size;

+ /// ByteArray, Inline, AllOnes: range of the size expressed as a bit width.

+ unsigned SizeBitWidth;

+ /// ByteArray: the byte array to test the address against.

+ Constant *TheByteArray;

+ /// ByteArray: the bit mask to apply to bytes loaded from the byte array.

+ Constant *BitMask;

+ /// Inline: the bit mask to test the address against.

+ Constant *InlineBits;

+ };

std::vector<ByteArrayInfo> ByteArrayInfos;

Function *WeakInitializerFn = nullptr;

@@ -268,15 +288,13 @@ class LowerTypeTestsModule {

const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);

ByteArrayInfo *createByteArray(BitSetInfo &BSI);

void allocateByteArrays();

- Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,

+ Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,

Value *BitOffset);

void lowerTypeTestCalls(

ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,

const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);

- Value *

- lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,

- Constant *CombinedGlobal,

- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);

+ Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,

+ const TypeIdLowering &TIL);

void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,

ArrayRef<GlobalTypeMember *> Globals);

unsigned getJumpTableEntrySize();

@@ -302,6 +320,7 @@ class LowerTypeTestsModule {

public:

LowerTypeTestsModule(Module &M);

+ ~LowerTypeTestsModule();

bool lower();

};

@@ -380,7 +399,7 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {

BAI->Bits = BSI.Bits;

BAI->BitSize = BSI.BitSize;

BAI->ByteArray = ByteArrayGlobal;

- BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);

+ BAI->MaskGlobal = MaskGlobal;

return BAI;

}

@@ -399,8 +418,9 @@ void LowerTypeTestsModule::allocateByteArrays() {

uint8_t Mask;

BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);

- BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));

- cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();

+ BAI->MaskGlobal->replaceAllUsesWith(

+ ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));

+ BAI->MaskGlobal->eraseFromParent();

}

Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);

@@ -435,101 +455,121 @@ void LowerTypeTestsModule::allocateByteArrays() {

ByteArraySizeBytes = BAB.Bytes.size();

}

-/// Build a test that bit BitOffset is set in BSI, where

-/// BitSetGlobal is a global containing the bits in BSI.

-Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,

- ByteArrayInfo *&BAI,

+/// Build a test that bit BitOffset is set in the type identifier that was

+/// lowered to TIL, which must be either an Inline or a ByteArray.

+Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,

+ const TypeIdLowering &TIL,

Value *BitOffset) {

- if (BSI.BitSize <= 64) {

+ if (TIL.TheKind == TypeTestResolution::Inline) {

// If the bit set is sufficiently small, we can avoid a load by bit testing

// a constant.

- IntegerType *BitsTy;

- if (BSI.BitSize <= 32)

- BitsTy = Int32Ty;

- else

- BitsTy = Int64Ty;

- uint64_t Bits = 0;

- for (auto Bit : BSI.Bits)

- Bits |= uint64_t(1) << Bit;

- Constant *BitsConst = ConstantInt::get(BitsTy, Bits);

- return createMaskedBitTest(B, BitsConst, BitOffset);

+ return createMaskedBitTest(B, TIL.InlineBits, BitOffset);

} else {

- if (!BAI) {

- ++NumByteArraysCreated;

- BAI = createByteArray(BSI);

- }

- Constant *ByteArray = BAI->ByteArray;

- Type *Ty = BAI->ByteArray->getValueType();

+ Constant *ByteArray = TIL.TheByteArray;

if (!LinkerSubsectionsViaSymbols && AvoidReuse) {

// Each use of the byte array uses a different alias. This makes the

// backend less likely to reuse previously computed byte array addresses,

// improving the security of the CFI mechanism based on this pass.

- ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,

- GlobalValue::PrivateLinkage, "bits_use",

- ByteArray, &M);

+ ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,

+ "bits_use", ByteArray, &M);

}

- Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);

+ Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);

Value *Byte = B.CreateLoad(ByteAddr);

- Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);

+ Value *ByteAndMask =

+ B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));

return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));

}

+static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,

+ Value *V, uint64_t COffset) {

+ if (auto GV = dyn_cast<GlobalObject>(V)) {

+ SmallVector<MDNode *, 2> Types;

+ GV->getMetadata(LLVMContext::MD_type, Types);

+ for (MDNode *Type : Types) {

+ if (Type->getOperand(1) != TypeId)

+ continue;

+ uint64_t Offset =

+ cast<ConstantInt>(

+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())

+ ->getZExtValue();

+ if (COffset == Offset)

+ return true;

+ }

+ return false;

+ }

+ if (auto GEP = dyn_cast<GEPOperator>(V)) {

+ APInt APOffset(DL.getPointerSizeInBits(0), 0);

+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);

+ if (!Result)

+ return false;

+ COffset += APOffset.getZExtValue();

+ return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);

+ }

+ if (auto Op = dyn_cast<Operator>(V)) {

+ if (Op->getOpcode() == Instruction::BitCast)

+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);

+ if (Op->getOpcode() == Instruction::Select)

+ return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&

+ isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);

+ }

+ return false;

/// Lower a llvm.type.test call to its implementation. Returns the value to

/// replace the call with.

-Value *LowerTypeTestsModule::lowerBitSetCall(

- CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,

- Constant *CombinedGlobalIntAddr,

- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {

+Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,

+ const TypeIdLowering &TIL) {

+ if (TIL.TheKind == TypeTestResolution::Unsat)

+ return ConstantInt::getFalse(M.getContext());

Value *Ptr = CI->getArgOperand(0);

const DataLayout &DL = M.getDataLayout();

- if (BSI.containsValue(DL, GlobalLayout, Ptr))

+ if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))

return ConstantInt::getTrue(M.getContext());

- Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(

- CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));

BasicBlock *InitialBB = CI->getParent();

IRBuilder<> B(CI);

Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);

- if (BSI.isSingleOffset())

+ Constant *OffsetedGlobalAsInt =

+ ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);

+ if (TIL.TheKind == TypeTestResolution::Single)

return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);

Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);

- Value *BitOffset;

- if (BSI.AlignLog2 == 0) {

- BitOffset = PtrOffset;

- } else {

- // We need to check that the offset both falls within our range and is

- // suitably aligned. We can check both properties at the same time by

- // performing a right rotate by log2(alignment) followed by an integer

- // comparison against the bitset size. The rotate will move the lower

- // order bits that need to be zero into the higher order bits of the

- // result, causing the comparison to fail if they are nonzero. The rotate

- // also conveniently gives us a bit offset to use during the load from

- // the bitset.

- Value *OffsetSHR =

- B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));

- Value *OffsetSHL = B.CreateShl(

- PtrOffset,

- ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));

- BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);

- }

- Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);

+ // We need to check that the offset both falls within our range and is

+ // suitably aligned. We can check both properties at the same time by

+ // performing a right rotate by log2(alignment) followed by an integer

+ // comparison against the bitset size. The rotate will move the lower

+ // order bits that need to be zero into the higher order bits of the

+ // result, causing the comparison to fail if they are nonzero. The rotate

+ // also conveniently gives us a bit offset to use during the load from

+ // the bitset.

+ Value *OffsetSHR =

+ B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));

+ Value *OffsetSHL = B.CreateShl(

+ PtrOffset, ConstantExpr::getZExt(

+ ConstantExpr::getSub(

+ ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),

+ TIL.AlignLog2),

+ IntPtrTy));

+ Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);

+ Constant *BitSizeConst = ConstantExpr::getZExt(TIL.Size, IntPtrTy);

Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);

// If the bit set is all ones, testing against it is unnecessary.

- if (BSI.isAllOnes())

+ if (TIL.TheKind == TypeTestResolution::AllOnes)

return OffsetInRange;

TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);

@@ -537,7 +577,7 @@ Value *LowerTypeTestsModule::lowerBitSetCall(

// Now that we know that the offset is in range and aligned, load the

// appropriate bit from the bitset.

- Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);

+ Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);

// The value we want is 0 if we came directly from the initial block

// (having failed the range or alignment checks), or the loaded bit if

@@ -622,11 +662,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(

void LowerTypeTestsModule::lowerTypeTestCalls(

ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,

const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {

- Constant *CombinedGlobalIntAddr =

- ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);

- DenseMap<GlobalObject *, uint64_t> GlobalObjLayout;

- for (auto &P : GlobalLayout)

- GlobalObjLayout[P.first->getGlobal()] = P.second;

+ CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);

// For each type identifier in this disjoint set...

for (Metadata *TypeId : TypeIds) {

@@ -640,13 +676,43 @@ void LowerTypeTestsModule::lowerTypeTestCalls(

BSI.print(dbgs());

});

- ByteArrayInfo *BAI = nullptr;

+ TypeIdLowering TIL;

+ TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(

+ Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),

+ TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);

+ if (BSI.isAllOnes()) {

+ TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single

+ : TypeTestResolution::AllOnes;

+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;

+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,

+ BSI.BitSize);

+ } else if (BSI.BitSize <= 64) {

+ TIL.TheKind = TypeTestResolution::Inline;

+ TIL.SizeBitWidth = (BSI.BitSize <= 32) ? 5 : 6;

+ TIL.Size = ConstantInt::get(Int8Ty, BSI.BitSize);

+ uint64_t InlineBits = 0;

+ for (auto Bit : BSI.Bits)

+ InlineBits |= uint64_t(1) << Bit;

+ if (InlineBits == 0)

+ TIL.TheKind = TypeTestResolution::Unsat;

+ else

+ TIL.InlineBits = ConstantInt::get(

+ (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);

+ } else {

+ TIL.TheKind = TypeTestResolution::ByteArray;

+ TIL.SizeBitWidth = (BSI.BitSize <= 256) ? 8 : 32;

+ TIL.Size = ConstantInt::get((BSI.BitSize <= 256) ? Int8Ty : Int32Ty,

+ BSI.BitSize);

+ ++NumByteArraysCreated;

+ ByteArrayInfo *BAI = createByteArray(BSI);

+ TIL.TheByteArray = BAI->ByteArray;

+ TIL.BitMask = BAI->MaskGlobal;

+ }

// Lower each call to llvm.type.test for this type identifier.

for (CallInst *CI : TypeTestCallSites[TypeId]) {

++NumTypeTestCallsLowered;

- Value *Lowered =

- lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout);

+ Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);

CI->replaceAllUsesWith(Lowered);

CI->eraseFromParent();

}

@@ -1080,6 +1146,22 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(

/// Lower all type tests in this module.

LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {

+ // Handle the command-line summary arguments. This code is for testing

+ // purposes only, so we handle errors directly.

+ if (!ClSummaryAction.empty()) {

+ OwnedSummary = make_unique<ModuleSummaryIndex>();

+ if (!ClReadSummary.empty()) {

+ ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +

+ ": ");

+ auto ReadSummaryFile =

+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));

+ yaml::Input In(ReadSummaryFile->getBuffer());

+ In >> *OwnedSummary;

+ ExitOnErr(errorCodeToError(In.error()));

+ }

Triple TargetTriple(M.getTargetTriple());

LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();

Arch = TargetTriple.getArch();

@@ -1087,6 +1169,20 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {

ObjectFormat = TargetTriple.getObjectFormat();

}

+LowerTypeTestsModule::~LowerTypeTestsModule() {

+ if (ClSummaryAction.empty() || ClWriteSummary.empty())

+ return;

+ ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +

+ ": ");

+ std::error_code EC;

+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);

+ ExitOnErr(errorCodeToError(EC));

+ yaml::Output Out(OS);

+ Out << *OwnedSummary;

bool LowerTypeTestsModule::lower() {

Function *TypeTestFunc =

M.getFunction(Intrinsic::getName(Intrinsic::type_test));

diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f863d192fc2f..b29ed3c87451 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

@@ -1637,6 +1637,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

break;

}

+ case Intrinsic::cos:

+ case Intrinsic::amdgcn_cos: {

+ Value *SrcSrc;

+ Value *Src = II->getArgOperand(0);

+ if (match(Src, m_FNeg(m_Value(SrcSrc))) ||

+ match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {

+ // cos(-x) -> cos(x)

+ // cos(fabs(x)) -> cos(x)

+ II->setArgOperand(0, SrcSrc);

+ return II;

+ }

+ break;

+ }

case Intrinsic::ppc_altivec_lvx:

case Intrinsic::ppc_altivec_lvxl:

// Turn PPC lvx -> load if the pointer is known aligned.

diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6a7cb0e45c63..1d5528398776 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

@@ -514,7 +514,8 @@ struct AddressSanitizer : public FunctionPass {

void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,

Value *Addr, uint32_t TypeSize, bool IsWrite,

Value *SizeArgument, bool UseCalls, uint32_t Exp);

- void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,

+ void instrumentUnusualSizeOrAlignment(Instruction *I,

+ Instruction *InsertBefore, Value *Addr,

uint32_t TypeSize, bool IsWrite,

Value *SizeArgument, bool UseCalls,

uint32_t Exp);

@@ -1056,20 +1057,18 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,

return nullptr;

*IsWrite = false;

}

- // Only instrument if the mask is constant for now.

- if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {

- auto BasePtr = CI->getOperand(0 + OpOffset);

- auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();

- *TypeSize = DL.getTypeStoreSizeInBits(Ty);

- if (auto AlignmentConstant =

- dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))

- *Alignment = (unsigned)AlignmentConstant->getZExtValue();

- else

- *Alignment = 1; // No alignment guarantees. We probably got Undef

- if (MaybeMask)

- *MaybeMask = CI->getOperand(2 + OpOffset);

- PtrOperand = BasePtr;

- }

+ auto BasePtr = CI->getOperand(0 + OpOffset);

+ auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();

+ *TypeSize = DL.getTypeStoreSizeInBits(Ty);

+ if (auto AlignmentConstant =

+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))

+ *Alignment = (unsigned)AlignmentConstant->getZExtValue();

+ else

+ *Alignment = 1; // No alignment guarantees. We probably got Undef

+ if (MaybeMask)

+ *MaybeMask = CI->getOperand(2 + OpOffset);

+ PtrOperand = BasePtr;

}

@@ -1130,24 +1129,25 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(

}

static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,

- Value *Addr, unsigned Alignment,

- unsigned Granularity, uint32_t TypeSize,

- bool IsWrite, Value *SizeArgument,

- bool UseCalls, uint32_t Exp) {

+ Instruction *InsertBefore, Value *Addr,

+ unsigned Alignment, unsigned Granularity,

+ uint32_t TypeSize, bool IsWrite,

+ Value *SizeArgument, bool UseCalls,

+ uint32_t Exp) {

// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check

// if the data is properly aligned.

if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||

TypeSize == 128) &&

(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))

- return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,

- UseCalls, Exp);

- Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,

- UseCalls, Exp);

+ return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,

+ nullptr, UseCalls, Exp);

+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,

+ IsWrite, nullptr, UseCalls, Exp);

}

static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,

const DataLayout &DL, Type *IntptrTy,

- ConstantVector *Mask, Instruction *I,

+ Value *Mask, Instruction *I,

Value *Addr, unsigned Alignment,

unsigned Granularity, uint32_t TypeSize,

bool IsWrite, Value *SizeArgument,

@@ -1157,15 +1157,30 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,

unsigned Num = VTy->getVectorNumElements();

auto Zero = ConstantInt::get(IntptrTy, 0);

for (unsigned Idx = 0; Idx < Num; ++Idx) {

- // dyn_cast as we might get UndefValue

- auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));

- if (Masked && Masked->isAllOnesValue()) {

+ Value *InstrumentedAddress = nullptr;

+ Instruction *InsertBefore = I;

+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {

+ // dyn_cast as we might get UndefValue

+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {

+ if (Masked->isNullValue())

+ // Mask is constant false, so no instrumentation needed.

+ continue;

+ // If we have a true or undef value, fall through to doInstrumentAddress

+ // with InsertBefore == I

+ }

+ } else {

IRBuilder<> IRB(I);

- auto InstrumentedAddress =

- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});

- doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,

- ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);

+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);

+ TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);

+ InsertBefore = ThenTerm;

}

+ IRBuilder<> IRB(InsertBefore);

+ InstrumentedAddress =

+ IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});

+ doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,

+ Granularity, ElemTypeSize, IsWrite, SizeArgument,

+ UseCalls, Exp);

}

@@ -1220,12 +1235,11 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,

unsigned Granularity = 1 << Mapping.Scale;

if (MaybeMask) {

- auto Mask = cast<ConstantVector>(MaybeMask);

- instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,

- Granularity, TypeSize, IsWrite, nullptr,

- UseCalls, Exp);

+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr,

+ Alignment, Granularity, TypeSize, IsWrite,

+ nullptr, UseCalls, Exp);

} else {

- doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,

+ doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize,

IsWrite, nullptr, UseCalls, Exp);

}

@@ -1342,9 +1356,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,

// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able

// to report the actual access size.

void AddressSanitizer::instrumentUnusualSizeOrAlignment(

- Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,

- Value *SizeArgument, bool UseCalls, uint32_t Exp) {

- IRBuilder<> IRB(I);

+ Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,

+ bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {

+ IRBuilder<> IRB(InsertBefore);

Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);

Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);

if (UseCalls) {

@@ -1358,8 +1372,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(

Value *LastByte = IRB.CreateIntToPtr(

IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),

Addr->getType());

- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);

- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);

+ instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);

+ instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);

}

diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 9485bfd7c296..0137378b828b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp

@@ -1572,6 +1572,13 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,

// Assign value numbers to the new instructions.

for (Instruction *I : NewInsts) {

+ // Instructions that have been inserted in predecessor(s) to materialize

+ // the load address do not retain their original debug locations. Doing

+ // so could lead to confusing (but correct) source attributions.

+ // FIXME: How do we retain source locations without causing poor debugging

+ // behavior?

+ I->setDebugLoc(DebugLoc());

// FIXME: We really _ought_ to insert these value numbers into their

// parent's availability map. However, in doing so, we risk getting into

// ordering issues. If a block hasn't been processed yet, we would be

@@ -1601,8 +1608,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,

if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))

NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);

- // Transfer DebugLoc.

- NewLoad->setDebugLoc(LI->getDebugLoc());

+ // We do not propagate the old load's debug location, because the new

+ // load now lives in a different BB, and we want to avoid a jumpy line

+ // table.

+ // FIXME: How do we retain source locations without causing poor debugging

+ // behavior?

// Add the newly created load.

ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,

diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 1cc5c8f0da84..6ef9d0561322 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp

@@ -408,6 +408,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,

CurAST->deleteValue(&I);

I.eraseFromParent();

}

+ Changed = true;

continue;

}

@@ -766,6 +767,14 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,

// Move the new node to the Preheader, before its terminator.

I.moveBefore(Preheader->getTerminator());

+ // Do not retain debug locations when we are moving instructions to different

+ // basic blocks, because we want to avoid jumpy line tables. Calls, however,

+ // need to retain their debug locs because they may be inlined.

+ // FIXME: How do we retain source locations without causing poor debugging

+ // behavior?

+ if (!isa<CallInst>(I))

+ I.setDebugLoc(DebugLoc());

if (isa<LoadInst>(I))

++NumMovedLoads;

else if (isa<CallInst>(I))

@@ -911,14 +920,23 @@ bool llvm::promoteLoopAccessesToScalars(

// If at least one store is guaranteed to execute, both properties are

// satisfied, and promotion is legal.

+ //

// This, however, is not a necessary condition. Even if no store/load is

- // guaranteed to execute, we can still establish these properties:

- // (p1) by proving that hoisting the load into the preheader is

- // safe (i.e. proving dereferenceability on all paths through the loop). We

+ // guaranteed to execute, we can still establish these properties.

+ // We can establish (p1) by proving that hoisting the load into the preheader

+ // is safe (i.e. proving dereferenceability on all paths through the loop). We

// can use any access within the alias set to prove dereferenceability,

// since they're all must alias.

- // (p2) by proving the memory is thread-local, so the memory model

+ //

+ // There are two ways establish (p2):

+ // a) Prove the location is thread-local. In this case the memory model

// requirement does not apply, and stores are safe to insert.

+ // b) Prove a store dominates every exit block. In this case, if an exit

+ // blocks is reached, the original dynamic path would have taken us through

+ // the store, so inserting a store into the exit block is safe. Note that this

+ // is different from the store being guaranteed to execute. For instance,

+ // if an exception is thrown on the first iteration of the loop, the original

+ // store is never executed, but the exit blocks are not executed either.

bool DereferenceableInPH = false;

bool SafeToInsertStore = false;

@@ -1000,6 +1018,17 @@ bool llvm::promoteLoopAccessesToScalars(

}

+ // If a store dominates all exit blocks, it is safe to sink.

+ // As explained above, if an exit block was executed, a dominating

+ // store must have been been executed at least once, so we are not

+ // introducing stores on paths that did not have them.

+ // Note that this only looks at explicit exit blocks. If we ever

+ // start sinking stores into unwind edges (see above), this will break.

+ if (!SafeToInsertStore)

+ SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {

+ return DT->dominates(Store->getParent(), Exit);

+ });

// If the store is not guaranteed to execute, we may still get

// deref info through it.

if (!DereferenceableInPH) {

diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fd167db11789..2743574ecca6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

@@ -997,7 +997,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {

/// Check if the given conditional branch is based on the comparison between

/// a variable and zero, and if the variable is non-zero, the control yields to

/// the loop entry. If the branch matches the behavior, the variable involved

-/// in the comparion is returned. This function will be called to see if the

+/// in the comparison is returned. This function will be called to see if the

/// precondition and postcondition of the loop are in desirable form.

static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {

if (!BI || !BI->isConditional())

diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 90309d7ebba6..f64354497771 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp

@@ -283,8 +283,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,

// sinked.

for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {

Instruction *I = &*II++;

- if (!L.hasLoopInvariantOperands(I) ||

- !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))

+ if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))

continue;

if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))

Changed = true;

diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 440e36767edf..678d02e05d42 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp

@@ -56,12 +56,9 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(

if (!isPerformingImport() && !isModuleExporting())

return false;

- // If we are exporting, we need to see whether this value is marked

- // as NoRename in the summary. If we are importing, we may not have

- // a summary in the distributed backend case (only summaries for values

- // importes as defs, not references, are included in the index passed

- // to the distributed backends).

if (isPerformingImport()) {

+ assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&

+ "Attempting to promote non-renamable local");

// We don't know for sure yet if we are importing this value (as either

// a reference or a def), since we are simply walking all values in the

// module. But by necessity if we end up importing it and it is local,

@@ -77,13 +74,28 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(

assert(Summaries->second.size() == 1 && "Local has more than one summary");

auto Linkage = Summaries->second.front()->linkage();

if (!GlobalValue::isLocalLinkage(Linkage)) {

- assert(!Summaries->second.front()->noRename());

+ assert(!isNonRenamableLocal(*SGV) &&

+ "Attempting to promote non-renamable local");

return true;

}

return false;

}

+#ifndef NDEBUG

+bool FunctionImportGlobalProcessing::isNonRenamableLocal(

+ const GlobalValue &GV) const {

+ if (!GV.hasLocalLinkage())

+ return false;

+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.

+ if (GV.hasSection())

+ return true;

+ if (Used.count(const_cast<GlobalValue *>(&GV)))

+ return true;

+ return false;

+#endif

std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,

bool DoPromote) {

// For locals that must be promoted to global scope, ensure that

diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8cde0c4cd607..31daba2248aa 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {

return Cost;

}

-/// \brief Check whether the address computation for a non-consecutive memory

-/// access looks like an unlikely candidate for being merged into the indexing

-/// mode.

+/// \brief Gets Address Access SCEV after verifying that the access pattern

+/// is loop invariant except the induction variable dependence.

///

-/// We look for a GEP which has one index that is an induction variable and all

-/// other indices are loop invariant. If the stride of this access is also

-/// within a small bound we decide that this address computation can likely be

-/// merged into the addressing mode.

-/// In all other cases, we identify the address computation as complex.

-static bool isLikelyComplexAddressComputation(Value *Ptr,

- LoopVectorizationLegality *Legal,

- ScalarEvolution *SE,

- const Loop *TheLoop) {

+/// This SCEV can be sent to the Target in order to estimate the address

+/// calculation cost.

+static const SCEV *getAddressAccessSCEV(

+ Value *Ptr,

+ LoopVectorizationLegality *Legal,

+ ScalarEvolution *SE,

+ const Loop *TheLoop) {

auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);

if (!Gep)

- return true;

+ return nullptr;

// We are looking for a gep with all loop invariant indices except for one

// which should be an induction variable.

@@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,

Value *Opd = Gep->getOperand(i);

if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&

!Legal->isInductionVariable(Opd))

- return true;

+ return nullptr;

}

- // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step

- // can likely be merged into the address computation.

- unsigned MaxMergeDistance = 64;

- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));

- if (!AddRec)

- return true;

- // Check the step is constant.

- const SCEV *Step = AddRec->getStepRecurrence(*SE);

- // Calculate the pointer stride and check if it is consecutive.

- const auto *C = dyn_cast<SCEVConstant>(Step);

- if (!C)

- return true;

- const APInt &APStepVal = C->getAPInt();

- // Huge step value - give up.

- if (APStepVal.getBitWidth() > 64)

- return true;

- int64_t StepVal = APStepVal.getSExtValue();

- return StepVal > MaxMergeDistance;

+ // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.

+ return SE->getSCEV(Ptr);

}

static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {

@@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,

unsigned Cost = 0;

Type *PtrTy = ToVectorTy(Ptr->getType(), VF);

- // True if the memory instruction's address computation is complex.

- bool IsComplexComputation =

- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);

+ // Figure out whether the access is strided and get the stride value

+ // if it's known in compile time

+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);

// Get the cost of the scalar memory instruction and address computation.

- Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);

+ Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);

Cost += VF *

TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),

Alignment, AS);