src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-01-19 10:01:25 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-01-19 10:01:25 +0000
commit	d8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree	7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Analysis/VectorUtils.cpp
parent	b7eb8e35e481a74962664b63dfb09483b200209a (diff)
download	src-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip

Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319

point): https://llvm.org/svn/llvm-project/llvm/trunk@351319

Notes

Notes: svn path=/vendor/llvm/dist/; revision=343171 svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319

Diffstat (limited to 'lib/Analysis/VectorUtils.cpp')

-rw-r--r--

lib/Analysis/VectorUtils.cpp

530

1 files changed, 513 insertions, 17 deletions

diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index d73d24736439..5656a19d7e0d 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp

@@ -15,6 +15,7 @@

#include "llvm/ADT/EquivalenceClasses.h"

#include "llvm/Analysis/DemandedBits.h"

#include "llvm/Analysis/LoopInfo.h"

+#include "llvm/Analysis/LoopIterator.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/TargetTransformInfo.h"

@@ -25,16 +26,30 @@

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Value.h"

+#define DEBUG_TYPE "vectorutils"

using namespace llvm;

using namespace llvm::PatternMatch;

-/// Identify if the intrinsic is trivially vectorizable.

-/// This method returns true if the intrinsic's argument types are all

-/// scalars for the scalar form of the intrinsic and all vectors for

-/// the vector form of the intrinsic.

+/// Maximum factor for an interleaved memory access.

+static cl::opt<unsigned> MaxInterleaveGroupFactor(

+ "max-interleave-group-factor", cl::Hidden,

+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),

+ cl::init(8));

+/// Return true if all of the intrinsic's arguments and return type are scalars

+/// for the scalar form of the intrinsic and vectors for the vector form of the

+/// intrinsic.

bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {

switch (ID) {

- case Intrinsic::sqrt:

+ case Intrinsic::bswap: // Begin integer bit-manipulation.

+ case Intrinsic::bitreverse:

+ case Intrinsic::ctpop:

+ case Intrinsic::ctlz:

+ case Intrinsic::cttz:

+ case Intrinsic::fshl:

+ case Intrinsic::fshr:

+ case Intrinsic::sqrt: // Begin floating-point.

case Intrinsic::sin:

case Intrinsic::cos:

case Intrinsic::exp:

@@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {

case Intrinsic::fabs:

case Intrinsic::minnum:

case Intrinsic::maxnum:

+ case Intrinsic::minimum:

+ case Intrinsic::maximum:

case Intrinsic::copysign:

case Intrinsic::floor:

case Intrinsic::ceil:

@@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {

case Intrinsic::rint:

case Intrinsic::nearbyint:

case Intrinsic::round:

- case Intrinsic::bswap:

- case Intrinsic::bitreverse:

- case Intrinsic::ctpop:

case Intrinsic::pow:

case Intrinsic::fma:

case Intrinsic::fmuladd:

- case Intrinsic::ctlz:

- case Intrinsic::cttz:

case Intrinsic::powi:

+ case Intrinsic::canonicalize:

+ case Intrinsic::sadd_sat:

+ case Intrinsic::ssub_sat:

+ case Intrinsic::uadd_sat:

+ case Intrinsic::usub_sat:

return true;

default:

return false;

@@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {

}

// Extract a value from a vector add operation with a constant zero.

- Value *Val = nullptr; Constant *Con = nullptr;

- if (match(V, m_Add(m_Value(Val), m_Constant(Con))))

- if (Constant *Elt = Con->getAggregateElement(EltNo))

+ // TODO: Use getBinOpIdentity() to generalize this.

+ Value *Val; Constant *C;

+ if (match(V, m_Add(m_Value(Val), m_Constant(C))))

+ if (Constant *Elt = C->getAggregateElement(EltNo))

if (Elt->isNullValue())

return findScalarElement(Val, EltNo);

@@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,

return MinBWs;

}

+/// Add all access groups in @p AccGroups to @p List.

+template <typename ListT>

+static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {

+ // Interpret an access group as a list containing itself.

+ if (AccGroups->getNumOperands() == 0) {

+ assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");

+ List.insert(AccGroups);

+ return;

+ }

+ for (auto &AccGroupListOp : AccGroups->operands()) {

+ auto *Item = cast<MDNode>(AccGroupListOp.get());

+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");

+ List.insert(Item);

+ }

+MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {

+ if (!AccGroups1)

+ return AccGroups2;

+ if (!AccGroups2)

+ return AccGroups1;

+ if (AccGroups1 == AccGroups2)

+ return AccGroups1;

+ SmallSetVector<Metadata *, 4> Union;

+ addToAccessGroupList(Union, AccGroups1);

+ addToAccessGroupList(Union, AccGroups2);

+ if (Union.size() == 0)

+ return nullptr;

+ if (Union.size() == 1)

+ return cast<MDNode>(Union.front());

+ LLVMContext &Ctx = AccGroups1->getContext();

+ return MDNode::get(Ctx, Union.getArrayRef());

+MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,

+ const Instruction *Inst2) {

+ bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();

+ bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();

+ if (!MayAccessMem1 && !MayAccessMem2)

+ return nullptr;

+ if (!MayAccessMem1)

+ return Inst2->getMetadata(LLVMContext::MD_access_group);

+ if (!MayAccessMem2)

+ return Inst1->getMetadata(LLVMContext::MD_access_group);

+ MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);

+ MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);

+ if (!MD1 || !MD2)

+ return nullptr;

+ if (MD1 == MD2)

+ return MD1;

+ // Use set for scalable 'contains' check.

+ SmallPtrSet<Metadata *, 4> AccGroupSet2;

+ addToAccessGroupList(AccGroupSet2, MD2);

+ SmallVector<Metadata *, 4> Intersection;

+ if (MD1->getNumOperands() == 0) {

+ assert(isValidAsAccessGroup(MD1) && "Node must be an access group");

+ if (AccGroupSet2.count(MD1))

+ Intersection.push_back(MD1);

+ } else {

+ for (const MDOperand &Node : MD1->operands()) {

+ auto *Item = cast<MDNode>(Node.get());

+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");

+ if (AccGroupSet2.count(Item))

+ Intersection.push_back(Item);

+ }

+ if (Intersection.size() == 0)

+ return nullptr;

+ if (Intersection.size() == 1)

+ return cast<MDNode>(Intersection.front());

+ LLVMContext &Ctx = Inst1->getContext();

+ return MDNode::get(Ctx, Intersection);

/// \returns \p I after propagating metadata from \p VL.

Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

Instruction *I0 = cast<Instruction>(VL[0]);

SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;

I0->getAllMetadataOtherThanDebugLoc(Metadata);

- for (auto Kind :

- {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

- LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {

+ for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

+ LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,

+ LLVMContext::MD_access_group}) {

MDNode *MD = I0->getMetadata(Kind);

for (int J = 1, E = VL.size(); MD && J != E; ++J) {

@@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

case LLVMContext::MD_invariant_load:

MD = MDNode::intersect(MD, IMD);

break;

+ case LLVMContext::MD_access_group:

+ MD = intersectAccessGroups(Inst, IJ);

+ break;

default:

llvm_unreachable("unhandled metadata");

}

@@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

return Inst;

}

+Constant *

+llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,

+ const InterleaveGroup<Instruction> &Group) {

+ // All 1's means mask is not needed.

+ if (Group.getNumMembers() == Group.getFactor())

+ return nullptr;

+ // TODO: support reversed access.

+ assert(!Group.isReverse() && "Reversed group not supported.");

+ SmallVector<Constant *, 16> Mask;

+ for (unsigned i = 0; i < VF; i++)

+ for (unsigned j = 0; j < Group.getFactor(); ++j) {

+ unsigned HasMember = Group.getMember(j) ? 1 : 0;

+ Mask.push_back(Builder.getInt1(HasMember));

+ }

+ return ConstantVector::get(Mask);

+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,

+ unsigned ReplicationFactor, unsigned VF) {

+ SmallVector<Constant *, 16> MaskVec;

+ for (unsigned i = 0; i < VF; i++)

+ for (unsigned j = 0; j < ReplicationFactor; j++)

+ MaskVec.push_back(Builder.getInt32(i));

+ return ConstantVector::get(MaskVec);

Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,

unsigned NumVecs) {

SmallVector<Constant *, 16> Mask;

@@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {

return ResList[0];

}

+bool InterleavedAccessInfo::isStrided(int Stride) {

+ unsigned Factor = std::abs(Stride);

+ return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;

+void InterleavedAccessInfo::collectConstStrideAccesses(

+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

+ const ValueToValueMap &Strides) {

+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();

+ // Since it's desired that the load/store instructions be maintained in

+ // "program order" for the interleaved access analysis, we have to visit the

+ // blocks in the loop in reverse postorder (i.e., in a topological order).

+ // Such an ordering will ensure that any load/store that may be executed

+ // before a second load/store will precede the second load/store in

+ // AccessStrideInfo.

+ LoopBlocksDFS DFS(TheLoop);

+ DFS.perform(LI);

+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))

+ for (auto &I : *BB) {

+ auto *LI = dyn_cast<LoadInst>(&I);

+ auto *SI = dyn_cast<StoreInst>(&I);

+ if (!LI && !SI)

+ continue;

+ Value *Ptr = getLoadStorePointerOperand(&I);

+ // We don't check wrapping here because we don't know yet if Ptr will be

+ // part of a full group or a group with gaps. Checking wrapping for all

+ // pointers (even those that end up in groups with no gaps) will be overly

+ // conservative. For full groups, wrapping should be ok since if we would

+ // wrap around the address space we would do a memory access at nullptr

+ // even without the transformation. The wrapping checks are therefore

+ // deferred until after we've formed the interleaved groups.

+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,

+ /*Assume=*/true, /*ShouldCheckWrap=*/false);

+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);

+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());

+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());

+ // An alignment of 0 means target ABI alignment.

+ unsigned Align = getLoadStoreAlignment(&I);

+ if (!Align)

+ Align = DL.getABITypeAlignment(PtrTy->getElementType());

+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);

+ }

+// Analyze interleaved accesses and collect them into interleaved load and

+// store groups.

+//

+// When generating code for an interleaved load group, we effectively hoist all

+// loads in the group to the location of the first load in program order. When

+// generating code for an interleaved store group, we sink all stores to the

+// location of the last store. This code motion can change the order of load

+// and store instructions and may break dependences.

+//

+// The code generation strategy mentioned above ensures that we won't violate

+// any write-after-read (WAR) dependences.

+//

+// E.g., for the WAR dependence: a = A[i]; // (1)

+// A[i] = b; // (2)

+//

+// The store group of (2) is always inserted at or below (2), and the load

+// group of (1) is always inserted at or above (1). Thus, the instructions will

+// never be reordered. All other dependences are checked to ensure the

+// correctness of the instruction reordering.

+//

+// The algorithm visits all memory accesses in the loop in bottom-up program

+// order. Program order is established by traversing the blocks in the loop in

+// reverse postorder when collecting the accesses.

+//

+// We visit the memory accesses in bottom-up order because it can simplify the

+// construction of store groups in the presence of write-after-write (WAW)

+// dependences.

+//

+// E.g., for the WAW dependence: A[i] = a; // (1)

+// A[i] = b; // (2)

+// A[i + 1] = c; // (3)

+//

+// We will first create a store group with (3) and (2). (1) can't be added to

+// this group because it and (2) are dependent. However, (1) can be grouped

+// with other accesses that may precede it in program order. Note that a

+// bottom-up order does not imply that WAW dependences should not be checked.

+void InterleavedAccessInfo::analyzeInterleaving(

+ bool EnablePredicatedInterleavedMemAccesses) {

+ LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");

+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();

+ // Holds all accesses with a constant stride.

+ MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;

+ collectConstStrideAccesses(AccessStrideInfo, Strides);

+ if (AccessStrideInfo.empty())

+ return;

+ // Collect the dependences in the loop.

+ collectDependences();

+ // Holds all interleaved store groups temporarily.

+ SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;

+ // Holds all interleaved load groups temporarily.

+ SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;

+ // Search in bottom-up program order for pairs of accesses (A and B) that can

+ // form interleaved load or store groups. In the algorithm below, access A

+ // precedes access B in program order. We initialize a group for B in the

+ // outer loop of the algorithm, and then in the inner loop, we attempt to

+ // insert each A into B's group if:

+ //

+ // 1. A and B have the same stride,

+ // 2. A and B have the same memory object size, and

+ // 3. A belongs in B's group according to its distance from B.

+ //

+ // Special care is taken to ensure group formation will not break any

+ // dependences.

+ for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

+ BI != E; ++BI) {

+ Instruction *B = BI->first;

+ StrideDescriptor DesB = BI->second;

+ // Initialize a group for B if it has an allowable stride. Even if we don't

+ // create a group for B, we continue with the bottom-up algorithm to ensure

+ // we don't break any of B's dependences.

+ InterleaveGroup<Instruction> *Group = nullptr;

+ if (isStrided(DesB.Stride) &&

+ (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

+ Group = getInterleaveGroup(B);

+ if (!Group) {

+ LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B

+ << '\n');

+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);

+ }

+ if (B->mayWriteToMemory())

+ StoreGroups.insert(Group);

+ else

+ LoadGroups.insert(Group);

+ }

+ for (auto AI = std::next(BI); AI != E; ++AI) {

+ Instruction *A = AI->first;

+ StrideDescriptor DesA = AI->second;

+ // Our code motion strategy implies that we can't have dependences

+ // between accesses in an interleaved group and other accesses located

+ // between the first and last member of the group. Note that this also

+ // means that a group can't have more than one member at a given offset.

+ // The accesses in a group can have dependences with other accesses, but

+ // we must ensure we don't extend the boundaries of the group such that

+ // we encompass those dependent accesses.

+ //

+ // For example, assume we have the sequence of accesses shown below in a

+ // stride-2 loop:

+ //

+ // (1, 2) is a group | A[i] = a; // (1)

+ // | A[i-1] = b; // (2) |

+ // A[i-3] = c; // (3)

+ // A[i] = d; // (4) | (2, 4) is not a group

+ //

+ // Because accesses (2) and (3) are dependent, we can group (2) with (1)

+ // but not with (4). If we did, the dependent access (3) would be within

+ // the boundaries of the (2, 4) group.

+ if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {

+ // If a dependence exists and A is already in a group, we know that A

+ // must be a store since A precedes B and WAR dependences are allowed.

+ // Thus, A would be sunk below B. We release A's group to prevent this

+ // illegal code motion. A will then be free to form another group with

+ // instructions that precede it.

+ if (isInterleaved(A)) {

+ InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);

+ StoreGroups.remove(StoreGroup);

+ releaseGroup(StoreGroup);

+ }

+ // If a dependence exists and A is not already in a group (or it was

+ // and we just released it), B might be hoisted above A (if B is a

+ // load) or another store might be sunk below A (if B is a store). In

+ // either case, we can't add additional instructions to B's group. B

+ // will only form a group with instructions that it precedes.

+ break;

+ }

+ // At this point, we've checked for illegal code motion. If either A or B

+ // isn't strided, there's nothing left to do.

+ if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

+ continue;

+ // Ignore A if it's already in a group or isn't the same kind of memory

+ // operation as B.

+ // Note that mayReadFromMemory() isn't mutually exclusive to

+ // mayWriteToMemory in the case of atomic loads. We shouldn't see those

+ // here, canVectorizeMemory() should have returned false - except for the

+ // case we asked for optimization remarks.

+ if (isInterleaved(A) ||

+ (A->mayReadFromMemory() != B->mayReadFromMemory()) ||

+ (A->mayWriteToMemory() != B->mayWriteToMemory()))

+ continue;

+ // Check rules 1 and 2. Ignore A if its stride or size is different from

+ // that of B.

+ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

+ continue;

+ // Ignore A if the memory object of A and B don't belong to the same

+ // address space

+ if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))

+ continue;

+ // Calculate the distance from A to B.

+ const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(

+ PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));

+ if (!DistToB)

+ continue;

+ int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

+ // Check rule 3. Ignore A if its distance to B is not a multiple of the

+ // size.

+ if (DistanceToB % static_cast<int64_t>(DesB.Size))

+ continue;

+ // All members of a predicated interleave-group must have the same predicate,

+ // and currently must reside in the same BB.

+ BasicBlock *BlockA = A->getParent();

+ BasicBlock *BlockB = B->getParent();

+ if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

+ (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

+ continue;

+ // The index of A is the index of B plus A's distance to B in multiples

+ // of the size.

+ int IndexA =

+ Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

+ // Try to insert A into B's group.

+ if (Group->insertMember(A, IndexA, DesA.Align)) {

+ LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'

+ << " into the interleave group with" << *B

+ << '\n');

+ InterleaveGroupMap[A] = Group;

+ // Set the first load in program order as the insert position.

+ if (A->mayReadFromMemory())

+ Group->setInsertPos(A);

+ }

+ } // Iteration over A accesses.

+ } // Iteration over B accesses.

+ // Remove interleaved store groups with gaps.

+ for (auto *Group : StoreGroups)

+ if (Group->getNumMembers() != Group->getFactor()) {

+ LLVM_DEBUG(

+ dbgs() << "LV: Invalidate candidate interleaved store group due "

+ "to gaps.\n");

+ releaseGroup(Group);

+ }

+ // Remove interleaved groups with gaps (currently only loads) whose memory

+ // accesses may wrap around. We have to revisit the getPtrStride analysis,

+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does

+ // not check wrapping (see documentation there).

+ // FORNOW we use Assume=false;

+ // TODO: Change to Assume=true but making sure we don't exceed the threshold

+ // of runtime SCEV assumptions checks (thereby potentially failing to

+ // vectorize altogether).

+ // Additional optional optimizations:

+ // TODO: If we are peeling the loop and we know that the first pointer doesn't

+ // wrap then we can deduce that all pointers in the group don't wrap.

+ // This means that we can forcefully peel the loop in order to only have to

+ // check the first pointer for no-wrap. When we'll change to use Assume=true

+ // we'll only need at most one runtime check per interleaved group.

+ for (auto *Group : LoadGroups) {

+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide

+ // load would wrap around the address space we would do a memory access at

+ // nullptr even without the transformation.

+ if (Group->getNumMembers() == Group->getFactor())

+ continue;

+ // Case 2: If first and last members of the group don't wrap this implies

+ // that all the pointers in the group don't wrap.

+ // So we check only group member 0 (which is always guaranteed to exist),

+ // and group member Factor - 1; If the latter doesn't exist we rely on

+ // peeling (if it is a non-reveresed accsess -- see Case 3).

+ Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));

+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,

+ /*ShouldCheckWrap=*/true)) {

+ LLVM_DEBUG(

+ dbgs() << "LV: Invalidate candidate interleaved group due to "

+ "first group member potentially pointer-wrapping.\n");

+ releaseGroup(Group);

+ continue;

+ }

+ Instruction *LastMember = Group->getMember(Group->getFactor() - 1);

+ if (LastMember) {

+ Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);

+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,

+ /*ShouldCheckWrap=*/true)) {

+ LLVM_DEBUG(

+ dbgs() << "LV: Invalidate candidate interleaved group due to "

+ "last group member potentially pointer-wrapping.\n");

+ releaseGroup(Group);

+ }

+ } else {

+ // Case 3: A non-reversed interleaved load group with gaps: We need

+ // to execute at least one scalar epilogue iteration. This will ensure

+ // we don't speculatively access memory out-of-bounds. We only need

+ // to look for a member at index factor - 1, since every group must have

+ // a member at index zero.

+ if (Group->isReverse()) {

+ LLVM_DEBUG(

+ dbgs() << "LV: Invalidate candidate interleaved group due to "

+ "a reverse access with gaps.\n");

+ releaseGroup(Group);

+ continue;

+ }

+ LLVM_DEBUG(

+ dbgs() << "LV: Interleaved group requires epilogue iteration.\n");

+ RequiresScalarEpilogue = true;

+ }

+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {

+ // If no group had triggered the requirement to create an epilogue loop,

+ // there is nothing to do.

+ if (!requiresScalarEpilogue())

+ return;

+ // Avoid releasing a Group twice.

+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;

+ for (auto &I : InterleaveGroupMap) {

+ InterleaveGroup<Instruction> *Group = I.second;

+ if (Group->requiresScalarEpilogue())

+ DelSet.insert(Group);

+ }

+ for (auto *Ptr : DelSet) {

+ LLVM_DEBUG(

+ dbgs()

+ << "LV: Invalidate candidate interleaved group due to gaps that "

+ "require a scalar epilogue (not allowed under optsize) and cannot "

+ "be masked (not enabled). \n");

+ releaseGroup(Ptr);

+ }

+ RequiresScalarEpilogue = false;

+template <typename InstT>

+void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {

+ llvm_unreachable("addMetadata can only be used for Instruction");

+namespace llvm {

+template <>

+void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {

+ SmallVector<Value *, 4> VL;

+ std::transform(Members.begin(), Members.end(), std::back_inserter(VL),

+ [](std::pair<int, Instruction *> p) { return p.second; });

+ propagateMetadata(NewInst, VL);