aboutsummaryrefslogtreecommitdiff
path: root/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Analysis/VectorUtils.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
downloadsrc-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz
src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes: svn path=/vendor/llvm/dist/; revision=343171 svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'lib/Analysis/VectorUtils.cpp')
-rw-r--r--lib/Analysis/VectorUtils.cpp530
1 files changed, 513 insertions, 17 deletions
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index d73d24736439..5656a19d7e0d 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -25,16 +26,30 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#define DEBUG_TYPE "vectorutils"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+ "max-interleave-group-factor", cl::Hidden,
+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+ cl::init(8));
+
+/// Return true if all of the intrinsic's arguments and return type are scalars
+/// for the scalar form of the intrinsic and vectors for the vector form of the
+/// intrinsic.
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::sqrt:
+ case Intrinsic::bswap: // Begin integer bit-manipulation.
+ case Intrinsic::bitreverse:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
@@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
- case Intrinsic::bswap:
- case Intrinsic::bitreverse:
- case Intrinsic::ctpop:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
case Intrinsic::powi:
+ case Intrinsic::canonicalize:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
return true;
default:
return false;
@@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
}
// Extract a value from a vector add operation with a constant zero.
- Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
- if (Constant *Elt = Con->getAggregateElement(EltNo))
+ // TODO: Use getBinOpIdentity() to generalize this.
+ Value *Val; Constant *C;
+ if (match(V, m_Add(m_Value(Val), m_Constant(C))))
+ if (Constant *Elt = C->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
@@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
return MinBWs;
}
+/// Add all access groups in @p AccGroups to @p List.
+template <typename ListT>
+static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
+ // Interpret an access group as a list containing itself.
+ if (AccGroups->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
+ List.insert(AccGroups);
+ return;
+ }
+
+ for (auto &AccGroupListOp : AccGroups->operands()) {
+ auto *Item = cast<MDNode>(AccGroupListOp.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ List.insert(Item);
+ }
+}
+
+MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
+ if (!AccGroups1)
+ return AccGroups2;
+ if (!AccGroups2)
+ return AccGroups1;
+ if (AccGroups1 == AccGroups2)
+ return AccGroups1;
+
+ SmallSetVector<Metadata *, 4> Union;
+ addToAccessGroupList(Union, AccGroups1);
+ addToAccessGroupList(Union, AccGroups2);
+
+ if (Union.size() == 0)
+ return nullptr;
+ if (Union.size() == 1)
+ return cast<MDNode>(Union.front());
+
+ LLVMContext &Ctx = AccGroups1->getContext();
+ return MDNode::get(Ctx, Union.getArrayRef());
+}
+
+MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,
+ const Instruction *Inst2) {
+ bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
+ bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
+
+ if (!MayAccessMem1 && !MayAccessMem2)
+ return nullptr;
+ if (!MayAccessMem1)
+ return Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MayAccessMem2)
+ return Inst1->getMetadata(LLVMContext::MD_access_group);
+
+ MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
+ MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MD1 || !MD2)
+ return nullptr;
+ if (MD1 == MD2)
+ return MD1;
+
+ // Use set for scalable 'contains' check.
+ SmallPtrSet<Metadata *, 4> AccGroupSet2;
+ addToAccessGroupList(AccGroupSet2, MD2);
+
+ SmallVector<Metadata *, 4> Intersection;
+ if (MD1->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
+ if (AccGroupSet2.count(MD1))
+ Intersection.push_back(MD1);
+ } else {
+ for (const MDOperand &Node : MD1->operands()) {
+ auto *Item = cast<MDNode>(Node.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ if (AccGroupSet2.count(Item))
+ Intersection.push_back(Item);
+ }
+ }
+
+ if (Intersection.size() == 0)
+ return nullptr;
+ if (Intersection.size() == 1)
+ return cast<MDNode>(Intersection.front());
+
+ LLVMContext &Ctx = Inst1->getContext();
+ return MDNode::get(Ctx, Intersection);
+}
+
/// \returns \p I after propagating metadata from \p VL.
Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
I0->getAllMetadataOtherThanDebugLoc(Metadata);
- for (auto Kind :
- {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
+ for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_access_group}) {
MDNode *MD = I0->getMetadata(Kind);
for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
case LLVMContext::MD_invariant_load:
MD = MDNode::intersect(MD, IMD);
break;
+ case LLVMContext::MD_access_group:
+ MD = intersectAccessGroups(Inst, IJ);
+ break;
default:
llvm_unreachable("unhandled metadata");
}
@@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
return Inst;
}
+Constant *
+llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+ const InterleaveGroup<Instruction> &Group) {
+ // All 1's means mask is not needed.
+ if (Group.getNumMembers() == Group.getFactor())
+ return nullptr;
+
+ // TODO: support reversed access.
+ assert(!Group.isReverse() && "Reversed group not supported.");
+
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < Group.getFactor(); ++j) {
+ unsigned HasMember = Group.getMember(j) ? 1 : 0;
+ Mask.push_back(Builder.getInt1(HasMember));
+ }
+
+ return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,
+ unsigned ReplicationFactor, unsigned VF) {
+ SmallVector<Constant *, 16> MaskVec;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < ReplicationFactor; j++)
+ MaskVec.push_back(Builder.getInt32(i));
+
+ return ConstantVector::get(MaskVec);
+}
+
Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs) {
SmallVector<Constant *, 16> Mask;
@@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
return ResList[0];
}
+
+bool InterleavedAccessInfo::isStrided(int Stride) {
+ unsigned Factor = std::abs(Stride);
+ return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
+}
+
+void InterleavedAccessInfo::collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides) {
+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+
+ // Since it's desired that the load/store instructions be maintained in
+ // "program order" for the interleaved access analysis, we have to visit the
+ // blocks in the loop in reverse postorder (i.e., in a topological order).
+ // Such an ordering will ensure that any load/store that may be executed
+ // before a second load/store will precede the second load/store in
+ // AccessStrideInfo.
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+ for (auto &I : *BB) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!LI && !SI)
+ continue;
+
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
+ // pointers (even those that end up in groups with no gaps) will be overly
+ // conservative. For full groups, wrapping should be ok since if we would
+ // wrap around the address space we would do a memory access at nullptr
+ // even without the transformation. The wrapping checks are therefore
+ // deferred until after we've formed the interleaved groups.
+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false);
+
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+ // An alignment of 0 means target ABI alignment.
+ unsigned Align = getLoadStoreAlignment(&I);
+ if (!Align)
+ Align = DL.getABITypeAlignment(PtrTy->getElementType());
+
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ }
+}
+
+// Analyze interleaved accesses and collect them into interleaved load and
+// store groups.
+//
+// When generating code for an interleaved load group, we effectively hoist all
+// loads in the group to the location of the first load in program order. When
+// generating code for an interleaved store group, we sink all stores to the
+// location of the last store. This code motion can change the order of load
+// and store instructions and may break dependences.
+//
+// The code generation strategy mentioned above ensures that we won't violate
+// any write-after-read (WAR) dependences.
+//
+// E.g., for the WAR dependence: a = A[i]; // (1)
+// A[i] = b; // (2)
+//
+// The store group of (2) is always inserted at or below (2), and the load
+// group of (1) is always inserted at or above (1). Thus, the instructions will
+// never be reordered. All other dependences are checked to ensure the
+// correctness of the instruction reordering.
+//
+// The algorithm visits all memory accesses in the loop in bottom-up program
+// order. Program order is established by traversing the blocks in the loop in
+// reverse postorder when collecting the accesses.
+//
+// We visit the memory accesses in bottom-up order because it can simplify the
+// construction of store groups in the presence of write-after-write (WAW)
+// dependences.
+//
+// E.g., for the WAW dependence: A[i] = a; // (1)
+// A[i] = b; // (2)
+// A[i + 1] = c; // (3)
+//
+// We will first create a store group with (3) and (2). (1) can't be added to
+// this group because it and (2) are dependent. However, (1) can be grouped
+// with other accesses that may precede it in program order. Note that a
+// bottom-up order does not imply that WAW dependences should not be checked.
+void InterleavedAccessInfo::analyzeInterleaving(
+ bool EnablePredicatedInterleavedMemAccesses) {
+ LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+
+ // Holds all accesses with a constant stride.
+ MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
+ collectConstStrideAccesses(AccessStrideInfo, Strides);
+
+ if (AccessStrideInfo.empty())
+ return;
+
+ // Collect the dependences in the loop.
+ collectDependences();
+
+ // Holds all interleaved store groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
+ // Holds all interleaved load groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
+
+ // Search in bottom-up program order for pairs of accesses (A and B) that can
+ // form interleaved load or store groups. In the algorithm below, access A
+ // precedes access B in program order. We initialize a group for B in the
+ // outer loop of the algorithm, and then in the inner loop, we attempt to
+ // insert each A into B's group if:
+ //
+ // 1. A and B have the same stride,
+ // 2. A and B have the same memory object size, and
+ // 3. A belongs in B's group according to its distance from B.
+ //
+ // Special care is taken to ensure group formation will not break any
+ // dependences.
+ for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
+ BI != E; ++BI) {
+ Instruction *B = BI->first;
+ StrideDescriptor DesB = BI->second;
+
+ // Initialize a group for B if it has an allowable stride. Even if we don't
+ // create a group for B, we continue with the bottom-up algorithm to ensure
+ // we don't break any of B's dependences.
+ InterleaveGroup<Instruction> *Group = nullptr;
+ if (isStrided(DesB.Stride) &&
+ (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
+ Group = getInterleaveGroup(B);
+ if (!Group) {
+ LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
+ << '\n');
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ }
+ if (B->mayWriteToMemory())
+ StoreGroups.insert(Group);
+ else
+ LoadGroups.insert(Group);
+ }
+
+ for (auto AI = std::next(BI); AI != E; ++AI) {
+ Instruction *A = AI->first;
+ StrideDescriptor DesA = AI->second;
+
+ // Our code motion strategy implies that we can't have dependences
+ // between accesses in an interleaved group and other accesses located
+ // between the first and last member of the group. Note that this also
+ // means that a group can't have more than one member at a given offset.
+ // The accesses in a group can have dependences with other accesses, but
+ // we must ensure we don't extend the boundaries of the group such that
+ // we encompass those dependent accesses.
+ //
+ // For example, assume we have the sequence of accesses shown below in a
+ // stride-2 loop:
+ //
+ // (1, 2) is a group | A[i] = a; // (1)
+ // | A[i-1] = b; // (2) |
+ // A[i-3] = c; // (3)
+ // A[i] = d; // (4) | (2, 4) is not a group
+ //
+ // Because accesses (2) and (3) are dependent, we can group (2) with (1)
+ // but not with (4). If we did, the dependent access (3) would be within
+ // the boundaries of the (2, 4) group.
+ if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
+ // If a dependence exists and A is already in a group, we know that A
+ // must be a store since A precedes B and WAR dependences are allowed.
+ // Thus, A would be sunk below B. We release A's group to prevent this
+ // illegal code motion. A will then be free to form another group with
+ // instructions that precede it.
+ if (isInterleaved(A)) {
+ InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+ StoreGroups.remove(StoreGroup);
+ releaseGroup(StoreGroup);
+ }
+
+ // If a dependence exists and A is not already in a group (or it was
+ // and we just released it), B might be hoisted above A (if B is a
+ // load) or another store might be sunk below A (if B is a store). In
+ // either case, we can't add additional instructions to B's group. B
+ // will only form a group with instructions that it precedes.
+ break;
+ }
+
+ // At this point, we've checked for illegal code motion. If either A or B
+ // isn't strided, there's nothing left to do.
+ if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
+ continue;
+
+ // Ignore A if it's already in a group or isn't the same kind of memory
+ // operation as B.
+ // Note that mayReadFromMemory() isn't mutually exclusive to
+ // mayWriteToMemory in the case of atomic loads. We shouldn't see those
+ // here, canVectorizeMemory() should have returned false - except for the
+ // case we asked for optimization remarks.
+ if (isInterleaved(A) ||
+ (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
+ (A->mayWriteToMemory() != B->mayWriteToMemory()))
+ continue;
+
+ // Check rules 1 and 2. Ignore A if its stride or size is different from
+ // that of B.
+ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
+ continue;
+
+ // Ignore A if the memory object of A and B don't belong to the same
+ // address space
+ if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
+ continue;
+
+ // Calculate the distance from A to B.
+ const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
+ if (!DistToB)
+ continue;
+ int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
+
+ // Check rule 3. Ignore A if its distance to B is not a multiple of the
+ // size.
+ if (DistanceToB % static_cast<int64_t>(DesB.Size))
+ continue;
+
+ // All members of a predicated interleave-group must have the same predicate,
+ // and currently must reside in the same BB.
+ BasicBlock *BlockA = A->getParent();
+ BasicBlock *BlockB = B->getParent();
+ if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
+ (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
+ continue;
+
+ // The index of A is the index of B plus A's distance to B in multiples
+ // of the size.
+ int IndexA =
+ Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+
+ // Try to insert A into B's group.
+ if (Group->insertMember(A, IndexA, DesA.Align)) {
+ LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
+ << " into the interleave group with" << *B
+ << '\n');
+ InterleaveGroupMap[A] = Group;
+
+ // Set the first load in program order as the insert position.
+ if (A->mayReadFromMemory())
+ Group->setInsertPos(A);
+ }
+ } // Iteration over A accesses.
+ } // Iteration over B accesses.
+
+ // Remove interleaved store groups with gaps.
+ for (auto *Group : StoreGroups)
+ if (Group->getNumMembers() != Group->getFactor()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ }
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // not check wrapping (see documentation there).
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // of runtime SCEV assumptions checks (thereby potentially failing to
+ // vectorize altogether).
+ // Additional optional optimizations:
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // wrap then we can deduce that all pointers in the group don't wrap.
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // we'll only need at most one runtime check per interleaved group.
+ for (auto *Group : LoadGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and group member Factor - 1; If the latter doesn't exist we rely on
+ // peeling (if it is a non-reveresed accsess -- see Case 3).
+ Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "first group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+ if (LastMember) {
+ Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "last group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ }
+ } else {
+ // Case 3: A non-reversed interleaved load group with gaps: We need
+ // to execute at least one scalar epilogue iteration. This will ensure
+ // we don't speculatively access memory out-of-bounds. We only need
+ // to look for a member at index factor - 1, since every group must have
+ // a member at index zero.
+ if (Group->isReverse()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
+}
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+ // If no group had triggered the requirement to create an epilogue loop,
+ // there is nothing to do.
+ if (!requiresScalarEpilogue())
+ return;
+
+ // Avoid releasing a Group twice.
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+ for (auto &I : InterleaveGroupMap) {
+ InterleaveGroup<Instruction> *Group = I.second;
+ if (Group->requiresScalarEpilogue())
+ DelSet.insert(Group);
+ }
+ for (auto *Ptr : DelSet) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Invalidate candidate interleaved group due to gaps that "
+ "require a scalar epilogue (not allowed under optsize) and cannot "
+ "be masked (not enabled). \n");
+ releaseGroup(Ptr);
+ }
+
+ RequiresScalarEpilogue = false;
+}
+
+template <typename InstT>
+void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
+ llvm_unreachable("addMetadata can only be used for Instruction");
+}
+
+namespace llvm {
+template <>
+void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
+ SmallVector<Value *, 4> VL;
+ std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
+ [](std::pair<int, Instruction *> p) { return p.second; });
+ propagateMetadata(NewInst, VL);
+}
+}