src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2022-03-17 15:34:17 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-03-17 15:34:17 +0000
commit	7ed7200811069c513465e0a7867ec0cb24bdb2dc (patch)
tree	dde84250cccd5c49a24528acfbf4a41f206acfd2
parent	1ff3a73c1ece7a0a4b7a8457eb4d5c47334b1526 (diff)

Vendor import of llvm-project branch release/14.x llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a.vendor/llvm-project/llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a

Diffstat

-rw-r--r--

clang/include/clang/Basic/LangOptions.h

-rw-r--r--

clang/lib/AST/RecordLayoutBuilder.cpp

-rw-r--r--

clang/lib/Driver/ToolChains/PPCLinux.cpp

-rw-r--r--

clang/lib/Frontend/CompilerInvocation.cpp

-rw-r--r--

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

-rw-r--r--

libcxx/include/span

-rw-r--r--

lld/COFF/Writer.cpp

-rw-r--r--

llvm/include/llvm/Analysis/InlineCost.h

-rw-r--r--

llvm/include/llvm/Transforms/Scalar.h

-rw-r--r--

llvm/include/llvm/Transforms/Scalar/LICM.h

-rw-r--r--

llvm/include/llvm/Transforms/Utils/LoopUtils.h

-rw-r--r--

llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h

-rw-r--r--

llvm/lib/Analysis/InlineCost.cpp

-rw-r--r--

llvm/lib/CodeGen/MachineSink.cpp

-rw-r--r--

llvm/lib/MC/WasmObjectWriter.cpp

-rw-r--r--

llvm/lib/Passes/PassBuilder.cpp

-rw-r--r--

llvm/lib/Passes/PassBuilderPipelines.cpp

-rw-r--r--

llvm/lib/Passes/PassRegistry.def

-rw-r--r--

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

-rwxr-xr-x

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

-rw-r--r--

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

-rw-r--r--

llvm/lib/Target/Mips/MipsISelLowering.cpp

-rw-r--r--

llvm/lib/Transforms/IPO/Inliner.cpp

-rw-r--r--

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

-rw-r--r--

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

-rw-r--r--

llvm/lib/Transforms/Scalar/LICM.cpp

-rw-r--r--

llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp

-rw-r--r--

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

28 files changed, 382 insertions, 160 deletions

diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 50c7f038fc6b..09afa641acf9 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h

@@ -181,10 +181,6 @@ public:

/// global-scope inline variables incorrectly.

Ver12,

- /// Attempt to be ABI-compatible with code generated by Clang 13.0.x.

- /// This causes clang to not pack non-POD members of packed structs.

- Ver13,

/// Conform to the underlying platform's C and C++ ABIs as closely

/// as we can.

Latest

diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 709e05716a56..61a30ead165e 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp

@@ -1887,12 +1887,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,

UnfilledBitsInLastUnit = 0;

LastBitfieldStorageUnitSize = 0;

- llvm::Triple Target = Context.getTargetInfo().getTriple();

- bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() ||

- Context.getLangOpts().getClangABICompat() <=

- LangOptions::ClangABI::Ver13 ||

- Target.isPS4() || Target.isOSDarwin())) ||

- D->hasAttr<PackedAttr>();

+ bool FieldPacked = Packed || D->hasAttr<PackedAttr>();

AlignRequirementKind AlignRequirement = AlignRequirementKind::None;

CharUnits FieldSize;

diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp
index e480d8bd8703..2fea262fd109 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.cpp
+++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp

@@ -76,9 +76,11 @@ bool PPCLinuxToolChain::SupportIEEEFloat128(

if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx))

return true;

+ CXXStdlibType StdLib = ToolChain::GetCXXStdlibType(Args);

bool HasUnsupportedCXXLib =

- ToolChain::GetCXXStdlibType(Args) == CST_Libcxx &&

- GCCInstallation.getVersion().isOlderThan(12, 1, 0);

+ StdLib == CST_Libcxx ||

+ (StdLib == CST_Libstdcxx &&

+ GCCInstallation.getVersion().isOlderThan(12, 1, 0));

return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) &&

!(D.CCCIsCXX() && HasUnsupportedCXXLib);

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 553a0b31c0ab..7f1ce3da7e7e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp

@@ -3560,8 +3560,6 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,

GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA);

else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12)

GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA);

- else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13)

- GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA);

if (Opts.getSignReturnAddressScope() ==

LangOptions::SignReturnAddressScopeKind::All)

@@ -4064,8 +4062,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,

Opts.setClangABICompat(LangOptions::ClangABI::Ver11);

else if (Major <= 12)

Opts.setClangABICompat(LangOptions::ClangABI::Ver12);

- else if (Major <= 13)

- Opts.setClangABICompat(LangOptions::ClangABI::Ver13);

} else if (Ver != "latest") {

Diags.Report(diag::err_drv_invalid_value)

<< A->getAsString(Args) << A->getValue();

diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 1da0dfec3f23..467372c71496 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

@@ -6012,7 +6012,9 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,

(ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() ||

isa<OMPDeclareReductionDecl>(ParentDC) ||

isa<OMPDeclareMapperDecl>(ParentDC))) ||

- (isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda())) {

+ (isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda() &&

+ cast<CXXRecordDecl>(D)->getTemplateDepth() >

+ TemplateArgs.getNumRetainedOuterLevels())) {

// D is a local of some kind. Look into the map of local

// declarations to their instantiations.

if (CurrentInstantiationScope) {

diff --git a/libcxx/include/span b/libcxx/include/span
index fd95ecca17f7..b8dbc7e01fd6 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span

@@ -170,7 +170,25 @@ struct __is_std_span : false_type {};

template <class _Tp, size_t _Sz>

struct __is_std_span<span<_Tp, _Sz>> : true_type {};

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+// This is a temporary workaround until we ship <ranges> -- we've unfortunately been

+// shipping <span> before its API was finalized, and we used to provide a constructor

+// from container types that had the requirements below. To avoid breaking code that

+// has started relying on the range-based constructor until we ship all of <ranges>,

+// we emulate the constructor requirements like this.

+template <class _Range, class _ElementType, class = void>

+struct __span_compatible_range : false_type { };

+template <class _Range, class _ElementType>

+struct __span_compatible_range<_Range, _ElementType, void_t<

+ enable_if_t<!__is_std_span<remove_cvref_t<_Range>>::value>,

+ enable_if_t<!__is_std_array<remove_cvref_t<_Range>>::value>,

+ enable_if_t<!is_array_v<remove_cvref_t<_Range>>>,

+ decltype(data(declval<_Range>())),

+ decltype(size(declval<_Range>())),

+ enable_if_t<is_convertible_v<remove_pointer_t<decltype(data(declval<_Range&>()))>(*)[], _ElementType(*)[]>>

+>> : true_type { };

+#else

template <class _Range, class _ElementType>

concept __span_compatible_range =

ranges::contiguous_range<_Range> &&

@@ -248,7 +266,22 @@ public:

_LIBCPP_INLINE_VISIBILITY

constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {}

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+ template <class _Container, class = enable_if_t<

+ __span_compatible_range<_Container, element_type>::value

+ >>

+ _LIBCPP_INLINE_VISIBILITY

+ constexpr explicit span(_Container& __c) : __data{std::data(__c)} {

+ _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)");

+ }

+ template <class _Container, class = enable_if_t<

+ __span_compatible_range<const _Container, element_type>::value

+ >>

+ _LIBCPP_INLINE_VISIBILITY

+ constexpr explicit span(const _Container& __c) : __data{std::data(__c)} {

+ _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)");

+ }

+#else

template <__span_compatible_range<element_type> _Range>

_LIBCPP_INLINE_VISIBILITY

constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} {

@@ -434,7 +467,18 @@ public:

_LIBCPP_INLINE_VISIBILITY

constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {}

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)

+ template <class _Container, class = enable_if_t<

+ __span_compatible_range<_Container, element_type>::value

+ >>

+ _LIBCPP_INLINE_VISIBILITY

+ constexpr span(_Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {}

+ template <class _Container, class = enable_if_t<

+ __span_compatible_range<const _Container, element_type>::value

+ >>

+ _LIBCPP_INLINE_VISIBILITY

+ constexpr span(const _Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {}

+#else

template <__span_compatible_range<element_type> _Range>

_LIBCPP_INLINE_VISIBILITY

constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {}

diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 12db942f1db5..1ed2327ea630 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp

@@ -926,8 +926,14 @@ void Writer::createSections() {

// Move DISCARDABLE (or non-memory-mapped) sections to the end of file

// because the loader cannot handle holes. Stripping can remove other

// discardable ones than .reloc, which is first of them (created early).

- if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)

+ if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {

+ // Move discardable sections named .debug_ to the end, after other

+ // discardable sections. Stripping only removes the sections named

+ // .debug_* - thus try to avoid leaving holes after stripping.

+ if (s->name.startswith(".debug_"))

+ return 3;

return 2;

+ }

// .rsrc should come at the end of the non-discardable sections because its

// size may change by the Win32 UpdateResources() function, causing

// subsequent sections to move (see https://crbug.com/827082).

diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index f86ee5a14874..d3fa3b879125 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h

@@ -52,6 +52,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;

/// Do not inline dynamic allocas that have been constant propagated to be

/// static allocas above this amount in bytes.

const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;

+const char FunctionInlineCostMultiplierAttributeName[] =

+ "function-inline-cost-multiplier";

} // namespace InlineConstants

// The cost-benefit pair computed by cost-benefit analysis.

@@ -217,6 +220,8 @@ struct InlineParams {

Optional<bool> AllowRecursiveCall = false;

};

+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);

/// Generate the parameters to tune the inline cost analysis based only on the

/// commandline options.

InlineParams getInlineParams();

diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index d6228700aa9a..4d6874f784ef 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h

@@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass();

Pass *createLICMPass();

Pass *createLICMPass(unsigned LicmMssaOptCap,

- unsigned LicmMssaNoAccForPromotionCap);

+ unsigned LicmMssaNoAccForPromotionCap,

+ bool AllowSpeculation);

//===----------------------------------------------------------------------===//

diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h
index 751f75c0ccb2..503c8792d309 100644
--- a/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/llvm/include/llvm/Transforms/Scalar/LICM.h

@@ -46,14 +46,18 @@ extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;

class LICMPass : public PassInfoMixin<LICMPass> {

unsigned LicmMssaOptCap;

unsigned LicmMssaNoAccForPromotionCap;

+ bool LicmAllowSpeculation;

public:

LICMPass()

: LicmMssaOptCap(SetLicmMssaOptCap),

- LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}

- LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)

+ LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),

+ LicmAllowSpeculation(true) {}

+ LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,

+ bool LicmAllowSpeculation)

: LicmMssaOptCap(LicmMssaOptCap),

- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}

+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),

+ LicmAllowSpeculation(LicmAllowSpeculation) {}

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,

LoopStandardAnalysisResults &AR, LPMUpdater &U);

};

@@ -62,14 +66,18 @@ public:

class LNICMPass : public PassInfoMixin<LNICMPass> {

unsigned LicmMssaOptCap;

unsigned LicmMssaNoAccForPromotionCap;

+ bool LicmAllowSpeculation;

public:

LNICMPass()

: LicmMssaOptCap(SetLicmMssaOptCap),

- LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}

- LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)

+ LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),

+ LicmAllowSpeculation(true) {}

+ LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,

+ bool LicmAllowSpeculation)

: LicmMssaOptCap(LicmMssaOptCap),

- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}

+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),

+ LicmAllowSpeculation(LicmAllowSpeculation) {}

PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,

LoopStandardAnalysisResults &AR, LPMUpdater &U);

};

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 3a712d78df67..134f8bcfd888 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h

@@ -171,10 +171,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,

/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all

/// instructions of the loop and loop safety information as arguments.

/// Diagnostics is emitted via \p ORE. It returns changed status.

+/// \p AllowSpeculation is whether values should be hoisted even if they are not

+/// guaranteed to execute in the loop, but are safe to speculatively execute.

bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,

BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,

MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,

- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);

+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,

+ bool AllowSpeculation);

/// This function deletes dead loops. The caller of this function needs to

/// guarantee that the loop is infact dead.

@@ -204,12 +207,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,

/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions

/// of the loop and loop safety information as arguments.

/// Diagnostics is emitted via \p ORE. It returns changed status.

+/// \p AllowSpeculation is whether values should be hoisted even if they are not

+/// guaranteed to execute in the loop, but are safe to speculatively execute.

bool promoteLoopAccessesToScalars(

const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,

SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,

PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,

Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,

- OptimizationRemarkEmitter *);

+ OptimizationRemarkEmitter *, bool AllowSpeculation);

/// Does a BFS from a given node to all of its children inside a given loop.

/// The returned vector of nodes includes the starting point.

diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index fb3a7490346f..7af879638a4d 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h

@@ -23,6 +23,7 @@ class AssumptionCache;

struct SimplifyCFGOptions {

int BonusInstThreshold = 1;

bool ForwardSwitchCondToPhi = false;

+ bool ConvertSwitchRangeToICmp = false;

bool ConvertSwitchToLookupTable = false;

bool NeedCanonicalLoop = true;

bool HoistCommonInsts = false;

@@ -41,6 +42,10 @@ struct SimplifyCFGOptions {

ForwardSwitchCondToPhi = B;

return *this;

}

+ SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) {

+ ConvertSwitchRangeToICmp = B;

+ return *this;

+ }

SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {

ConvertSwitchToLookupTable = B;

return *this;

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d5411d916c77..cd5314e7a17a 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp

@@ -133,8 +133,6 @@ static cl::opt<bool> DisableGEPConstOperand(

cl::desc("Disables evaluation of GetElementPtr with constant operands"));

namespace {

-class InlineCostCallAnalyzer;

/// This function behaves more like CallBase::hasFnAttr: when it looks for the

/// requested attribute, it check both the call instruction and the called

/// function (if it's available and operand bundles don't prohibit that).

@@ -151,7 +149,9 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {

return {};

}

+} // namespace

+namespace llvm {

Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {

Attribute Attr = getFnAttr(CB, AttrKind);

int AttrValue;

@@ -159,6 +159,10 @@ Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {

return None;

return AttrValue;

}

+} // namespace llvm

+namespace {

+class InlineCostCallAnalyzer;

// This struct is used to store information about inline cost of a

// particular instruction

@@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {

getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))

Cost = *AttrCost;

+ if (Optional<int> AttrCostMult = getStringFnAttrAsInt(

+ CandidateCall,

+ InlineConstants::FunctionInlineCostMultiplierAttributeName))

+ Cost *= *AttrCostMult;

if (Optional<int> AttrThreshold =

getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))

Threshold = *AttrThreshold;

diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 0dbbc218e946..bc03776bde19 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp

@@ -18,12 +18,14 @@

#include "llvm/ADT/DenseSet.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/PointerIntPair.h"

+#include "llvm/ADT/PostOrderIterator.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/SparseBitVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/Analysis/CFG.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"

#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"

@@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {

AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

RegClassInfo.runOnMachineFunction(MF);

+ // MachineSink currently uses MachineLoopInfo, which only recognizes natural

+ // loops. As such, we could sink instructions into irreducible cycles, which

+ // would be non-profitable.

+ // WARNING: The current implementation of hasStoreBetween() is incorrect for

+ // sinking into irreducible cycles (PR53990), this bailout is currently

+ // necessary for correctness, not just profitability.

+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());

+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))

+ return false;

bool EverMadeChange = false;

while (true) {

diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 636c1d238932..a016b7085a00 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp

@@ -140,36 +140,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {

}

#endif

-// Write X as an (unsigned) LEB value at offset Offset in Stream, padded

+// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded

// to allow patching.

-template <int W>

-void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {

+template <typename T, int W>

+void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {

uint8_t Buffer[W];

- unsigned SizeLen = encodeULEB128(X, Buffer, W);

+ unsigned SizeLen = encodeULEB128(Value, Buffer, W);

assert(SizeLen == W);

Stream.pwrite((char *)Buffer, SizeLen, Offset);

}

-// Write X as an signed LEB value at offset Offset in Stream, padded

+// Write Value as an signed LEB value at offset Offset in Stream, padded

// to allow patching.

-template <int W>

-void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) {

+template <typename T, int W>

+void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {

uint8_t Buffer[W];

- unsigned SizeLen = encodeSLEB128(X, Buffer, W);

+ unsigned SizeLen = encodeSLEB128(Value, Buffer, W);

assert(SizeLen == W);

Stream.pwrite((char *)Buffer, SizeLen, Offset);

}

-// Write X as a plain integer value at offset Offset in Stream.

-static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {

+static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value,

+ uint64_t Offset) {

+ writePatchableULEB<uint32_t, 5>(Stream, Value, Offset);

+static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value,

+ uint64_t Offset) {

+ writePatchableSLEB<int32_t, 5>(Stream, Value, Offset);

+static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value,

+ uint64_t Offset) {

+ writePatchableSLEB<uint64_t, 10>(Stream, Value, Offset);

+static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value,

+ uint64_t Offset) {

+ writePatchableSLEB<int64_t, 10>(Stream, Value, Offset);

+// Write Value as a plain integer value at offset Offset in Stream.

+static void patchI32(raw_pwrite_stream &Stream, uint32_t Value,

+ uint64_t Offset) {

uint8_t Buffer[4];

- support::endian::write32le(Buffer, X);

+ support::endian::write32le(Buffer, Value);

Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);

}

-static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {

+static void patchI64(raw_pwrite_stream &Stream, uint64_t Value,

+ uint64_t Offset) {

uint8_t Buffer[8];

- support::endian::write64le(Buffer, X);

+ support::endian::write64le(Buffer, Value);

Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);

}

@@ -423,8 +445,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {

// Write the final section size to the payload_len field, which follows

// the section id byte.

- writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W->OS), Size,

- Section.SizeOffset);

+ writePatchableU32(static_cast<raw_pwrite_stream &>(W->OS), Size,

+ Section.SizeOffset);

}

// Emit the Wasm header.

@@ -755,7 +777,7 @@ void WasmObjectWriter::applyRelocations(

RelEntry.Offset;

LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");

- auto Value = getProvisionalValue(RelEntry, Layout);

+ uint64_t Value = getProvisionalValue(RelEntry, Layout);

switch (RelEntry.Type) {

case wasm::R_WASM_FUNCTION_INDEX_LEB:

@@ -764,10 +786,10 @@ void WasmObjectWriter::applyRelocations(

case wasm::R_WASM_MEMORY_ADDR_LEB:

case wasm::R_WASM_TAG_INDEX_LEB:

case wasm::R_WASM_TABLE_NUMBER_LEB:

- writePatchableLEB<5>(Stream, Value, Offset);

+ writePatchableU32(Stream, Value, Offset);

break;

case wasm::R_WASM_MEMORY_ADDR_LEB64:

- writePatchableLEB<10>(Stream, Value, Offset);

+ writePatchableU64(Stream, Value, Offset);

break;

case wasm::R_WASM_TABLE_INDEX_I32:

case wasm::R_WASM_MEMORY_ADDR_I32:

@@ -787,14 +809,14 @@ void WasmObjectWriter::applyRelocations(

case wasm::R_WASM_MEMORY_ADDR_SLEB:

case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:

case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB:

- writePatchableSLEB<5>(Stream, Value, Offset);

+ writePatchableS32(Stream, Value, Offset);

break;

case wasm::R_WASM_TABLE_INDEX_SLEB64:

case wasm::R_WASM_TABLE_INDEX_REL_SLEB64:

case wasm::R_WASM_MEMORY_ADDR_SLEB64:

case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:

case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64:

- writePatchableSLEB<10>(Stream, Value, Offset);

+ writePatchableS64(Stream, Value, Offset);

break;

default:

llvm_unreachable("invalid relocation type");

diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 015ca1eec4df..dedfc81f11bb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp

@@ -679,6 +679,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {

bool Enable = !ParamName.consume_front("no-");

if (ParamName == "forward-switch-cond") {

Result.forwardSwitchCondToPhi(Enable);

+ } else if (ParamName == "switch-range-to-icmp") {

+ Result.convertSwitchRangeToICmp(Enable);

} else if (ParamName == "switch-to-lookup") {

Result.convertSwitchToLookupTable(Enable);

} else if (ParamName == "keep-loops") {

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 93637c890c4f..e838665eb9ce 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp

@@ -259,14 +259,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));

// Hoisting of scalars and load expressions.

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

FPM.addPass(InstCombinePass());

FPM.addPass(LibCallsShrinkWrapPass());

invokePeepholeEPCallbacks(FPM, Level);

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

// Form canonically associated expression trees, and simplify the trees using

// basic mathematical properties. For example, this will form (nearly)

@@ -291,14 +293,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,

LPM1.addPass(LoopSimplifyCFGPass());

// Try to remove as much code from the loop header as possible,

- // to reduce amount of IR that will have to be duplicated.

+ // to reduce amount of IR that will have to be duplicated. However,

+ // do not perform speculative hoisting the first time as LICM

+ // will destroy metadata that may not need to be destroyed if run

+ // after loop rotation.

// TODO: Investigate promotion cap for O1.

- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/false));

LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,

isLTOPreLink(Phase)));

// TODO: Investigate promotion cap for O1.

- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

LPM1.addPass(SimpleLoopUnswitchPass());

if (EnableLoopFlatten)

LPM1.addPass(LoopFlattenPass());

@@ -335,7 +342,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),

/*UseMemorySSA=*/true,

/*UseBlockFrequencyInfo=*/true));

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

FPM.addPass(InstCombinePass());

// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.

// *All* loop passes must preserve it, in order to be able to use it.

@@ -373,7 +381,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,

// the simplifications and basic cleanup after all the simplifications.

// TODO: Investigate if this is too expensive.

FPM.addPass(ADCEPass());

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

FPM.addPass(InstCombinePass());

invokePeepholeEPCallbacks(FPM, Level);

@@ -408,7 +417,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

// Global value numbering based sinking.

if (EnableGVNSink) {

FPM.addPass(GVNSinkPass());

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

}

if (EnableConstraintElimination)

@@ -421,7 +431,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(JumpThreadingPass());

FPM.addPass(CorrelatedValuePropagationPass());

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

FPM.addPass(InstCombinePass());

if (Level == OptimizationLevel::O3)

FPM.addPass(AggressiveInstCombinePass());

@@ -438,7 +449,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(PGOMemOPSizeOpt());

FPM.addPass(TailCallElimPass());

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

// Form canonically associated expression trees, and simplify the trees using

// basic mathematical properties. For example, this will form (nearly)

@@ -463,15 +475,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

LPM1.addPass(LoopSimplifyCFGPass());

// Try to remove as much code from the loop header as possible,

- // to reduce amount of IR that will have to be duplicated.

+ // to reduce amount of IR that will have to be duplicated. However,

+ // do not perform speculative hoisting the first time as LICM

+ // will destroy metadata that may not need to be destroyed if run

+ // after loop rotation.

// TODO: Investigate promotion cap for O1.

- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/false));

// Disable header duplication in loop rotation at -Oz.

LPM1.addPass(

LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));

// TODO: Investigate promotion cap for O1.

- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

LPM1.addPass(

SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&

EnableO3NonTrivialUnswitching));

@@ -510,7 +527,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),

/*UseMemorySSA=*/true,

/*UseBlockFrequencyInfo=*/true));

- FPM.addPass(SimplifyCFGPass());

+ FPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

FPM.addPass(InstCombinePass());

// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,

// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.

@@ -567,7 +585,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

FPM.addPass(DSEPass());

FPM.addPass(createFunctionToLoopPassAdaptor(

- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),

+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true),

/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

FPM.addPass(CoroElidePass());

@@ -575,8 +594,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

for (auto &C : ScalarOptimizerLateEPCallbacks)

C(FPM, Level);

- FPM.addPass(SimplifyCFGPass(

- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));

+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()

+ .convertSwitchRangeToICmp(true)

+ .hoistCommonInsts(true)

+ .sinkCommonInsts(true)));

FPM.addPass(InstCombinePass());

invokePeepholeEPCallbacks(FPM, Level);

@@ -614,7 +635,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,

FunctionPassManager FPM;

FPM.addPass(SROAPass());

FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.

- FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.

+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(

+ true))); // Merge & remove basic blocks.

FPM.addPass(InstCombinePass()); // Combine silly sequences.

invokePeepholeEPCallbacks(FPM, Level);

@@ -928,7 +950,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,

GlobalCleanupPM.addPass(InstCombinePass());

invokePeepholeEPCallbacks(GlobalCleanupPM, Level);

- GlobalCleanupPM.addPass(SimplifyCFGPass());

+ GlobalCleanupPM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),

PTO.EagerlyInvalidateAnalyses));

@@ -1007,7 +1030,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,

ExtraPasses.addPass(CorrelatedValuePropagationPass());

ExtraPasses.addPass(InstCombinePass());

LoopPassManager LPM;

- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==

OptimizationLevel::O3));

ExtraPasses.addPass(

@@ -1015,7 +1039,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,

ExtraPasses.addPass(

createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,

/*UseBlockFrequencyInfo=*/true));

- ExtraPasses.addPass(SimplifyCFGPass());

+ ExtraPasses.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

ExtraPasses.addPass(InstCombinePass());

FPM.addPass(std::move(ExtraPasses));

}

@@ -1031,6 +1056,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,

// before SLP vectorization.

FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()

.forwardSwitchCondToPhi(true)

+ .convertSwitchRangeToICmp(true)

.convertSwitchToLookupTable(true)

.needCanonicalLoops(false)

.hoistCommonInsts(true)

@@ -1073,7 +1099,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,

FPM.addPass(

RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());

FPM.addPass(createFunctionToLoopPassAdaptor(

- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),

+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true),

/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

}

@@ -1202,7 +1229,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,

// LoopSink (and other loop passes since the last simplifyCFG) might have

// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.

- OptimizePM.addPass(SimplifyCFGPass());

+ OptimizePM.addPass(

+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

OptimizePM.addPass(CoroCleanupPass());

@@ -1612,7 +1640,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

FunctionPassManager MainFPM;

MainFPM.addPass(createFunctionToLoopPassAdaptor(

- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),

+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true),

/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

if (RunNewGVN)

@@ -1676,8 +1705,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

// Add late LTO optimization passes.

// Delete basic blocks, which optimization passes may have killed.

- MPM.addPass(createModuleToFunctionPassAdaptor(

- SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));

+ MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(

+ true))));

// Drop bodies of available eternally objects to improve GlobalDCE.

MPM.addPass(EliminateAvailableExternallyPass());

diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 8e0af11b854d..69d8d8c43267 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def

@@ -423,6 +423,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg",

parseSimplifyCFGOptions,

"no-forward-switch-cond;forward-switch-cond;"

+ "no-switch-range-to-icmp;switch-range-to-icmp;"

"no-switch-to-lookup;switch-to-lookup;"

"no-keep-loops;keep-loops;"

"no-hoist-common-insts;hoist-common-insts;"

diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 4af28fc070dd..6a751da7ad55 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

@@ -531,6 +531,7 @@ void AArch64PassConfig::addIRPasses() {

if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)

addPass(createCFGSimplificationPass(SimplifyCFGOptions()

.forwardSwitchCondToPhi(true)

+ .convertSwitchRangeToICmp(true)

.convertSwitchToLookupTable(true)

.needCanonicalLoops(false)

.hoistCommonInsts(true)

diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0ba75a544c04..14b4f7c56c57 100755
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

@@ -118,9 +118,10 @@ HexagonTargetLowering::initializeHVXLowering() {

setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal);

// Vector shuffle is always promoted to ByteV and a bitcast to f16 is

// generated.

- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);

- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);

- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);

+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);

+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);

+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);

+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);

// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-

// independent) handling of it would convert it to a load, which is

@@ -780,7 +781,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,

SDValue N = HalfV0;

SDValue M = HalfV1;

for (unsigned i = 0; i != NumWords/2; ++i) {

// Rotate by element count since last insertion.

if (Words[i] != Words[n] || VecHist[n] <= 1) {

Sn = DAG.getConstant(Rn, dl, MVT::i32);

diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index c6703bb8a62a..08acf81961a3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

@@ -345,6 +345,7 @@ void HexagonPassConfig::addIRPasses() {

if (EnableInitialCFGCleanup)

addPass(createCFGSimplificationPass(SimplifyCFGOptions()

.forwardSwitchCondToPhi(true)

+ .convertSwitchRangeToICmp(true)

.convertSwitchToLookupTable(true)

.needCanonicalLoops(false)

.hoistCommonInsts(true)

diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0c2e129b8f1f..8534a0ad886e 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp

@@ -4732,18 +4732,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,

MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,

const MachineFunction &MF) const {

- // Named registers is expected to be fairly rare. For now, just support $28

- // since the linux kernel uses it.

+ // The Linux kernel uses $28 and sp.

if (Subtarget.isGP64bit()) {

- .Case("$28", Mips::GP_64)

- .Default(Register());

+ .Case("$28", Mips::GP_64)

+ .Case("sp", Mips::SP_64)

+ .Default(Register());

if (Reg)

return Reg;

} else {

- .Case("$28", Mips::GP)

- .Default(Register());

+ .Case("$28", Mips::GP)

+ .Case("sp", Mips::SP)

+ .Default(Register());

if (Reg)

return Reg;

}

diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82..10abea7ebd32 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp

@@ -22,6 +22,7 @@

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

+#include "llvm/ADT/StringExtras.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/BasicAliasAnalysis.h"

@@ -92,6 +93,18 @@ static cl::opt<bool>

DisableInlinedAllocaMerging("disable-inlined-alloca-merging",

cl::init(false), cl::Hidden);

+static cl::opt<int> IntraSCCCostMultiplier(

+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,

+ cl::desc(

+ "Cost multiplier to multiply onto inlined call sites where the "

+ "new call was previously an intra-SCC call (not relevant when the "

+ "original call was already intra-SCC). This can accumulate over "

+ "multiple inlinings (e.g. if a call site already had a cost "

+ "multiplier and one of its inlined calls was also subject to "

+ "this, the inlined call would have the original multiplier "

+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "

+ "inlining through a child SCC which can cause terrible compile times"));

/// A flag for test, so we can print the content of the advisor when running it

/// as part of the default (e.g. -O3) pipeline.

static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",

@@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,

// trigger infinite inlining, much like is prevented within the inliner

// itself by the InlineHistory above, but spread across CGSCC iterations

// and thus hidden from the full inline history.

- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&

- UR.InlinedInternalEdges.count({&N, C})) {

+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));

+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {

LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "

"previously split out of this SCC by inlining: "

<< F.getName() << " -> " << Callee.getName() << "\n");

@@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,

continue;

}

+ int CBCostMult =

+ getStringFnAttrAsInt(

+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)

+ .getValueOr(1);

// Setup the data structure used to plumb customization into the

// `InlineFunction` routine.

InlineFunctionInfo IFI(

@@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,

if (tryPromoteCall(*ICB))

NewCallee = ICB->getCalledFunction();

}

- if (NewCallee)

- if (!NewCallee->isDeclaration())

+ if (NewCallee) {

+ if (!NewCallee->isDeclaration()) {

Calls->push({ICB, NewHistoryID});

+ // Continually inlining through an SCC can result in huge compile

+ // times and bloated code since we arbitrarily stop at some point

+ // when the inliner decides it's not profitable to inline anymore.

+ // We attempt to mitigate this by making these calls exponentially

+ // more expensive.

+ // This doesn't apply to calls in the same SCC since if we do

+ // inline through the SCC the function will end up being

+ // self-recursive which the inliner bails out on, and inlining

+ // within an SCC is necessary for performance.

+ if (CalleeSCC != C &&

+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {

+ Attribute NewCBCostMult = Attribute::get(

+ M.getContext(),

+ InlineConstants::FunctionInlineCostMultiplierAttributeName,

+ itostr(CBCostMult * IntraSCCCostMultiplier));

+ ICB->addFnAttr(NewCBCostMult);

+ }

}

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5113c0c67acc..7205ae178d21 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

@@ -3712,9 +3712,9 @@ struct AAKernelInfoFunction : AAKernelInfo {

// __kmpc_get_hardware_num_threads_in_block();

// WarpSize = __kmpc_get_warp_size();

// BlockSize = BlockHwSize - WarpSize;

- // if (InitCB >= BlockSize) return;

- // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;

+ // IsWorkerCheckBB: bool IsWorker = InitCB != -1;

// if (IsWorker) {

+ // if (InitCB >= BlockSize) return;

// SMBeginBB: __kmpc_barrier_simple_generic(...);

// void *WorkFn;

// bool Active = __kmpc_kernel_parallel(&WorkFn);

@@ -3771,6 +3771,13 @@ struct AAKernelInfoFunction : AAKernelInfo {

ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);

InitBB->getTerminator()->eraseFromParent();

+ Instruction *IsWorker =

+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,

+ ConstantInt::get(KernelInitCB->getType(), -1),

+ "thread.is_worker", InitBB);

+ IsWorker->setDebugLoc(DLoc);

+ BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);

Module &M = *Kernel->getParent();

auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());

FunctionCallee BlockHwSizeFn =

@@ -3780,29 +3787,22 @@ struct AAKernelInfoFunction : AAKernelInfo {

OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(

M, OMPRTL___kmpc_get_warp_size);

CallInst *BlockHwSize =

- CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);

+ CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);

OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);

BlockHwSize->setDebugLoc(DLoc);

- CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);

+ CallInst *WarpSize =

+ CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);

OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);

WarpSize->setDebugLoc(DLoc);

- Instruction *BlockSize =

- BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);

+ Instruction *BlockSize = BinaryOperator::CreateSub(

+ BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);

BlockSize->setDebugLoc(DLoc);

- Instruction *IsMainOrWorker =

- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,

- BlockSize, "thread.is_main_or_worker", InitBB);

+ Instruction *IsMainOrWorker = ICmpInst::Create(

+ ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,

+ "thread.is_main_or_worker", IsWorkerCheckBB);

IsMainOrWorker->setDebugLoc(DLoc);

- BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,

- InitBB);

- Instruction *IsWorker =

- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,

- ConstantInt::get(KernelInitCB->getType(), -1),

- "thread.is_worker", IsWorkerCheckBB);

- IsWorker->setDebugLoc(DLoc);

- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,

- IsWorkerCheckBB);

+ BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,

+ IsMainOrWorker, IsWorkerCheckBB);

// Create local storage for the work function pointer.

const DataLayout &DL = M.getDataLayout();

diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 74f68531b89a..6e5aeb9c41f6 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -365,7 +365,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,

MPM.add(createFunctionInliningPass(IP));

MPM.add(createSROAPass());

MPM.add(createEarlyCSEPass()); // Catch trivial redundancies

- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs

+ MPM.add(createCFGSimplificationPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(

+ true))); // Merge & remove BBs

MPM.add(createInstructionCombiningPass()); // Combine silly seq's

addExtensionsToPM(EP_Peephole, MPM);

}

@@ -404,7 +406,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

MPM.add(createGVNHoistPass());

if (EnableGVNSink) {

MPM.add(createGVNSinkPass());

- MPM.add(createCFGSimplificationPass());

+ MPM.add(createCFGSimplificationPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

}

@@ -418,7 +421,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

MPM.add(createJumpThreadingPass()); // Thread jumps.

MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals

}

- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs

+ MPM.add(

+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(

+ true))); // Merge & remove BBs

// Combine silly seq's

if (OptLevel > 2)

MPM.add(createAggressiveInstCombinerPass());

@@ -434,7 +439,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

// TODO: Investigate the cost/benefit of tail call elimination on debugging.

if (OptLevel > 1)

MPM.add(createTailCallEliminationPass()); // Eliminate tail calls

- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs

+ MPM.add(

+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(

+ true))); // Merge & remove BBs

MPM.add(createReassociatePass()); // Reassociate expressions

// The matrix extension can introduce large vector operations early, which can

@@ -451,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

MPM.add(createLoopSimplifyCFGPass());

}

// Try to remove as much code from the loop header as possible,

- // to reduce amount of IR that will have to be duplicated.

+ // to reduce amount of IR that will have to be duplicated. However,

+ // do not perform speculative hoisting the first time as LICM

+ // will destroy metadata that may not need to be destroyed if run

+ // after loop rotation.

// TODO: Investigate promotion cap for O1.

- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/false));

// Rotate Loop - disable header duplication at -Oz

MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));

// TODO: Investigate promotion cap for O1.

- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

if (EnableSimpleLoopUnswitch)

MPM.add(createSimpleLoopUnswitchLegacyPass());

else

@@ -465,7 +477,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

// FIXME: We break the loop pass pipeline here in order to do full

// simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the

// need for this.

- MPM.add(createCFGSimplificationPass());

+ MPM.add(createCFGSimplificationPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

MPM.add(createInstructionCombiningPass());

// We resume loop passes creating a second loop pipeline here.

if (EnableLoopFlatten) {

@@ -521,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(

// TODO: Investigate if this is too expensive at O1.

if (OptLevel > 1) {

MPM.add(createDeadStoreEliminationPass()); // Delete dead stores

- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

}

addExtensionsToPM(EP_ScalarOptimizerLate, MPM);

@@ -580,9 +594,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,

PM.add(createEarlyCSEPass());

PM.add(createCorrelatedValuePropagationPass());

PM.add(createInstructionCombiningPass());

- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));

- PM.add(createCFGSimplificationPass());

+ PM.add(createCFGSimplificationPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

PM.add(createInstructionCombiningPass());

}

@@ -597,6 +613,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,

// before SLP vectorization.

PM.add(createCFGSimplificationPass(SimplifyCFGOptions()

.forwardSwitchCondToPhi(true)

+ .convertSwitchRangeToICmp(true)

.convertSwitchToLookupTable(true)

.needCanonicalLoops(false)

.hoistCommonInsts(true)

@@ -641,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,

// unrolled loop is a inner loop, then the prologue will be inside the

// outer loop. LICM pass can help to promote the runtime check out if the

// checked value is loop invariant.

- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

}

PM.add(createWarnMissedTransformationsPass());

@@ -772,7 +790,9 @@ void PassManagerBuilder::populateModulePassManager(

MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE

addExtensionsToPM(EP_Peephole, MPM);

- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE

+ MPM.add(

+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(

+ true))); // Clean up after IPCP & DAE

// For SamplePGO in ThinLTO compile phase, we do not want to do indirect

// call promotion as it will change the CFG too much to make the 2nd

@@ -886,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager(

// later might get benefit of no-alias assumption in clone loop.

if (UseLoopVersioningLICM) {

MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM

- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

}

// We add a fresh GlobalsModRef run at this point. This is particularly

@@ -972,7 +993,8 @@ void PassManagerBuilder::populateModulePassManager(

// LoopSink (and other loop passes since the last simplifyCFG) might have

// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.

- MPM.add(createCFGSimplificationPass());

+ MPM.add(createCFGSimplificationPass(

+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

addExtensionsToPM(EP_OptimizerLast, MPM);

@@ -1120,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {

// Run a few AA driven optimizations here and now, to cleanup the code.

PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.

- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));

+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ /*AllowSpeculation=*/true));

PM.add(NewGVN ? createNewGVNPass()

: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.

PM.add(createMemCpyOptPass()); // Remove dead memcpys.

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 7fb1a25bdf13..6372ce19f8ee 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp

@@ -149,13 +149,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,

BlockFrequencyInfo *BFI, const Loop *CurLoop,

ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,

OptimizationRemarkEmitter *ORE);

-static bool isSafeToExecuteUnconditionally(Instruction &Inst,

- const DominatorTree *DT,

- const TargetLibraryInfo *TLI,

- const Loop *CurLoop,

- const LoopSafetyInfo *SafetyInfo,

- OptimizationRemarkEmitter *ORE,

- const Instruction *CtxI = nullptr);

+static bool isSafeToExecuteUnconditionally(

+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,

+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,

+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,

+ bool AllowSpeculation);

static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,

AliasSetTracker *CurAST, Loop *CurLoop,

AAResults *AA);

@@ -188,21 +186,26 @@ struct LoopInvariantCodeMotion {

OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);

LoopInvariantCodeMotion(unsigned LicmMssaOptCap,

- unsigned LicmMssaNoAccForPromotionCap)

+ unsigned LicmMssaNoAccForPromotionCap,

+ bool LicmAllowSpeculation)

: LicmMssaOptCap(LicmMssaOptCap),

- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}

+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),

+ LicmAllowSpeculation(LicmAllowSpeculation) {}

private:

unsigned LicmMssaOptCap;

unsigned LicmMssaNoAccForPromotionCap;

+ bool LicmAllowSpeculation;

};

struct LegacyLICMPass : public LoopPass {

static char ID; // Pass identification, replacement for typeid

LegacyLICMPass(

unsigned LicmMssaOptCap = SetLicmMssaOptCap,

- unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)

- : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {

+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,

+ bool LicmAllowSpeculation = true)

+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ LicmAllowSpeculation) {

initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());

}

@@ -265,7 +268,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,

// but ORE cannot be preserved (see comment before the pass definition).

OptimizationRemarkEmitter ORE(L.getHeader()->getParent());

- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);

+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ LicmAllowSpeculation);

if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,

&AR.SE, AR.MSSA, &ORE))

return PreservedAnalyses::all();

@@ -290,7 +294,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,

// but ORE cannot be preserved (see comment before the pass definition).

OptimizationRemarkEmitter ORE(LN.getParent());

- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);

+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ LicmAllowSpeculation);

Loop &OutermostLoop = LN.getOutermostLoop();

bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,

@@ -321,8 +326,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,

Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }

Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,

- unsigned LicmMssaNoAccForPromotionCap) {

- return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);

+ unsigned LicmMssaNoAccForPromotionCap,

+ bool LicmAllowSpeculation) {

+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,

+ LicmAllowSpeculation);

}

llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,

@@ -418,7 +425,8 @@ bool LoopInvariantCodeMotion::runOnLoop(

Flags.setIsSink(false);

if (Preheader)

Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,

- &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);

+ &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,

+ LicmAllowSpeculation);

// Now that all loop invariants have been removed from the loop, promote any

// memory references to scalars that we can.

@@ -460,8 +468,8 @@ bool LoopInvariantCodeMotion::runOnLoop(

for (const SmallSetVector<Value *, 8> &PointerMustAliases :

collectPromotionCandidates(MSSA, AA, L)) {

LocalPromoted |= promoteLoopAccessesToScalars(

- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,

- LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);

+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,

+ DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);

}

Promoted |= LocalPromoted;

} while (LocalPromoted);

@@ -825,7 +833,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,

MemorySSAUpdater *MSSAU, ScalarEvolution *SE,

ICFLoopSafetyInfo *SafetyInfo,

SinkAndHoistLICMFlags &Flags,

- OptimizationRemarkEmitter *ORE, bool LoopNestMode) {

+ OptimizationRemarkEmitter *ORE, bool LoopNestMode,

+ bool AllowSpeculation) {

// Verify inputs.

assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&

CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&

@@ -877,7 +886,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,

true, &Flags, ORE) &&

isSafeToExecuteUnconditionally(

I, DT, TLI, CurLoop, SafetyInfo, ORE,

- CurLoop->getLoopPreheader()->getTerminator())) {

+ CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {

hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,

MSSAU, SE, ORE);

HoistedInstructions.push_back(&I);

@@ -1774,14 +1783,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,

/// Only sink or hoist an instruction if it is not a trapping instruction,

/// or if the instruction is known not to trap when moved to the preheader.

/// or if it is a trapping instruction and is guaranteed to execute.

-static bool isSafeToExecuteUnconditionally(Instruction &Inst,

- const DominatorTree *DT,

- const TargetLibraryInfo *TLI,

- const Loop *CurLoop,

- const LoopSafetyInfo *SafetyInfo,

- OptimizationRemarkEmitter *ORE,

- const Instruction *CtxI) {

- if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))

+static bool isSafeToExecuteUnconditionally(

+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,

+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,

+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,

+ bool AllowSpeculation) {

+ if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))

return true;

bool GuaranteedToExecute =

@@ -1949,7 +1956,7 @@ bool llvm::promoteLoopAccessesToScalars(

SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,

LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,

Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,

- OptimizationRemarkEmitter *ORE) {

+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {

// Verify inputs.

assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&

SafetyInfo != nullptr &&

@@ -2054,9 +2061,9 @@ bool llvm::promoteLoopAccessesToScalars(

// to execute does as well. Thus we can increase our guaranteed

// alignment as well.

if (!DereferenceableInPH || (InstAlignment > Alignment))

- if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,

- SafetyInfo, ORE,

- Preheader->getTerminator())) {

+ if (isSafeToExecuteUnconditionally(

+ *Load, DT, TLI, CurLoop, SafetyInfo, ORE,

+ Preheader->getTerminator(), AllowSpeculation)) {

DereferenceableInPH = true;

Alignment = std::max(Alignment, InstAlignment);

}

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ee17da1875e5..b8972751066d 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp

@@ -59,6 +59,11 @@ static cl::opt<bool> UserKeepLoops(

"keep-loops", cl::Hidden, cl::init(true),

cl::desc("Preserve canonical loop structure (default = true)"));

+static cl::opt<bool> UserSwitchRangeToICmp(

+ "switch-range-to-icmp", cl::Hidden, cl::init(false),

+ cl::desc(

+ "Convert switches into an integer range comparison (default = false)"));

static cl::opt<bool> UserSwitchToLookup(

"switch-to-lookup", cl::Hidden, cl::init(false),

cl::desc("Convert switches to lookup tables (default = false)"));

@@ -311,6 +316,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {

Options.BonusInstThreshold = UserBonusInstThreshold;

if (UserForwardSwitchCond.getNumOccurrences())

Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;

+ if (UserSwitchRangeToICmp.getNumOccurrences())

+ Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;

if (UserSwitchToLookup.getNumOccurrences())

Options.ConvertSwitchToLookupTable = UserSwitchToLookup;

if (UserKeepLoops.getNumOccurrences())

@@ -337,6 +344,8 @@ void SimplifyCFGPass::printPipeline(

OS << "<";

OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";

OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";

+ OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")

+ << "switch-range-to-icmp;";

OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")

<< "switch-to-lookup;";

OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 335ac03ccb52..8c4e1b381b4d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

@@ -6211,7 +6211,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {

}

// Try to transform the switch into an icmp and a branch.

- if (TurnSwitchRangeIntoICmp(SI, Builder))

+ // The conversion from switch to comparison may lose information on

+ // impossible switch values, so disable it early in the pipeline.

+ if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))

return requestResimplify();

// Remove unreachable cases.