diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Analysis/VectorUtils.cpp | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
download | src-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip |
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=343171
svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'lib/Analysis/VectorUtils.cpp')
-rw-r--r-- | lib/Analysis/VectorUtils.cpp | 530 |
1 files changed, 513 insertions, 17 deletions
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index d73d24736439..5656a19d7e0d 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -25,16 +26,30 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#define DEBUG_TYPE "vectorutils" + using namespace llvm; using namespace llvm::PatternMatch; -/// Identify if the intrinsic is trivially vectorizable. -/// This method returns true if the intrinsic's argument types are all -/// scalars for the scalar form of the intrinsic and all vectors for -/// the vector form of the intrinsic. +/// Maximum factor for an interleaved memory access. +static cl::opt<unsigned> MaxInterleaveGroupFactor( + "max-interleave-group-factor", cl::Hidden, + cl::desc("Maximum factor for an interleaved access group (default = 8)"), + cl::init(8)); + +/// Return true if all of the intrinsic's arguments and return type are scalars +/// for the scalar form of the intrinsic and vectors for the vector form of the +/// intrinsic. bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { switch (ID) { - case Intrinsic::sqrt: + case Intrinsic::bswap: // Begin integer bit-manipulation. + case Intrinsic::bitreverse: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::fshl: + case Intrinsic::fshr: + case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::exp: @@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::fabs: case Intrinsic::minnum: case Intrinsic::maxnum: + case Intrinsic::minimum: + case Intrinsic::maximum: case Intrinsic::copysign: case Intrinsic::floor: case Intrinsic::ceil: @@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: - case Intrinsic::bswap: - case Intrinsic::bitreverse: - case Intrinsic::ctpop: case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: - case Intrinsic::ctlz: - case Intrinsic::cttz: case Intrinsic::powi: + case Intrinsic::canonicalize: + case Intrinsic::sadd_sat: + case Intrinsic::ssub_sat: + case Intrinsic::uadd_sat: + case Intrinsic::usub_sat: return true; default: return false; @@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { } // Extract a value from a vector add operation with a constant zero. - Value *Val = nullptr; Constant *Con = nullptr; - if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) - if (Constant *Elt = Con->getAggregateElement(EltNo)) + // TODO: Use getBinOpIdentity() to generalize this. + Value *Val; Constant *C; + if (match(V, m_Add(m_Value(Val), m_Constant(C)))) + if (Constant *Elt = C->getAggregateElement(EltNo)) if (Elt->isNullValue()) return findScalarElement(Val, EltNo); @@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, return MinBWs; } +/// Add all access groups in @p AccGroups to @p List. +template <typename ListT> +static void addToAccessGroupList(ListT &List, MDNode *AccGroups) { + // Interpret an access group as a list containing itself. + if (AccGroups->getNumOperands() == 0) { + assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group"); + List.insert(AccGroups); + return; + } + + for (auto &AccGroupListOp : AccGroups->operands()) { + auto *Item = cast<MDNode>(AccGroupListOp.get()); + assert(isValidAsAccessGroup(Item) && "List item must be an access group"); + List.insert(Item); + } +} + +MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) { + if (!AccGroups1) + return AccGroups2; + if (!AccGroups2) + return AccGroups1; + if (AccGroups1 == AccGroups2) + return AccGroups1; + + SmallSetVector<Metadata *, 4> Union; + addToAccessGroupList(Union, AccGroups1); + addToAccessGroupList(Union, AccGroups2); + + if (Union.size() == 0) + return nullptr; + if (Union.size() == 1) + return cast<MDNode>(Union.front()); + + LLVMContext &Ctx = AccGroups1->getContext(); + return MDNode::get(Ctx, Union.getArrayRef()); +} + +MDNode *llvm::intersectAccessGroups(const Instruction *Inst1, + const Instruction *Inst2) { + bool MayAccessMem1 = Inst1->mayReadOrWriteMemory(); + bool MayAccessMem2 = Inst2->mayReadOrWriteMemory(); + + if (!MayAccessMem1 && !MayAccessMem2) + return nullptr; + if (!MayAccessMem1) + return Inst2->getMetadata(LLVMContext::MD_access_group); + if (!MayAccessMem2) + return Inst1->getMetadata(LLVMContext::MD_access_group); + + MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group); + MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group); + if (!MD1 || !MD2) + return nullptr; + if (MD1 == MD2) + return MD1; + + // Use set for scalable 'contains' check. + SmallPtrSet<Metadata *, 4> AccGroupSet2; + addToAccessGroupList(AccGroupSet2, MD2); + + SmallVector<Metadata *, 4> Intersection; + if (MD1->getNumOperands() == 0) { + assert(isValidAsAccessGroup(MD1) && "Node must be an access group"); + if (AccGroupSet2.count(MD1)) + Intersection.push_back(MD1); + } else { + for (const MDOperand &Node : MD1->operands()) { + auto *Item = cast<MDNode>(Node.get()); + assert(isValidAsAccessGroup(Item) && "List item must be an access group"); + if (AccGroupSet2.count(Item)) + Intersection.push_back(Item); + } + } + + if (Intersection.size() == 0) + return nullptr; + if (Intersection.size() == 1) + return cast<MDNode>(Intersection.front()); + + LLVMContext &Ctx = Inst1->getContext(); + return MDNode::get(Ctx, Intersection); +} + /// \returns \p I after propagating metadata from \p VL. Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { Instruction *I0 = cast<Instruction>(VL[0]); SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); - for (auto Kind : - {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_fpmath, - LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) { + for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_fpmath, + LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load, + LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); for (int J = 1, E = VL.size(); MD && J != E; ++J) { @@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { case LLVMContext::MD_invariant_load: MD = MDNode::intersect(MD, IMD); break; + case LLVMContext::MD_access_group: + MD = intersectAccessGroups(Inst, IJ); + break; default: llvm_unreachable("unhandled metadata"); } @@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { return Inst; } +Constant * +llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, + const InterleaveGroup<Instruction> &Group) { + // All 1's means mask is not needed. + if (Group.getNumMembers() == Group.getFactor()) + return nullptr; + + // TODO: support reversed access. + assert(!Group.isReverse() && "Reversed group not supported."); + + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < Group.getFactor(); ++j) { + unsigned HasMember = Group.getMember(j) ? 1 : 0; + Mask.push_back(Builder.getInt1(HasMember)); + } + + return ConstantVector::get(Mask); +} + +Constant *llvm::createReplicatedMask(IRBuilder<> &Builder, + unsigned ReplicationFactor, unsigned VF) { + SmallVector<Constant *, 16> MaskVec; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < ReplicationFactor; j++) + MaskVec.push_back(Builder.getInt32(i)); + + return ConstantVector::get(MaskVec); +} + Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs) { SmallVector<Constant *, 16> Mask; @@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) { return ResList[0]; } + +bool InterleavedAccessInfo::isStrided(int Stride) { + unsigned Factor = std::abs(Stride); + return Factor >= 2 && Factor <= MaxInterleaveGroupFactor; +} + +void InterleavedAccessInfo::collectConstStrideAccesses( + MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, + const ValueToValueMap &Strides) { + auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + + // Since it's desired that the load/store instructions be maintained in + // "program order" for the interleaved access analysis, we have to visit the + // blocks in the loop in reverse postorder (i.e., in a topological order). + // Such an ordering will ensure that any load/store that may be executed + // before a second load/store will precede the second load/store in + // AccessStrideInfo. + LoopBlocksDFS DFS(TheLoop); + DFS.perform(LI); + for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) + for (auto &I : *BB) { + auto *LI = dyn_cast<LoadInst>(&I); + auto *SI = dyn_cast<StoreInst>(&I); + if (!LI && !SI) + continue; + + Value *Ptr = getLoadStorePointerOperand(&I); + // We don't check wrapping here because we don't know yet if Ptr will be + // part of a full group or a group with gaps. Checking wrapping for all + // pointers (even those that end up in groups with no gaps) will be overly + // conservative. For full groups, wrapping should be ok since if we would + // wrap around the address space we would do a memory access at nullptr + // even without the transformation. The wrapping checks are therefore + // deferred until after we've formed the interleaved groups. + int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, + /*Assume=*/true, /*ShouldCheckWrap=*/false); + + const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); + PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); + uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); + + // An alignment of 0 means target ABI alignment. + unsigned Align = getLoadStoreAlignment(&I); + if (!Align) + Align = DL.getABITypeAlignment(PtrTy->getElementType()); + + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); + } +} + +// Analyze interleaved accesses and collect them into interleaved load and +// store groups. +// +// When generating code for an interleaved load group, we effectively hoist all +// loads in the group to the location of the first load in program order. When +// generating code for an interleaved store group, we sink all stores to the +// location of the last store. This code motion can change the order of load +// and store instructions and may break dependences. +// +// The code generation strategy mentioned above ensures that we won't violate +// any write-after-read (WAR) dependences. +// +// E.g., for the WAR dependence: a = A[i]; // (1) +// A[i] = b; // (2) +// +// The store group of (2) is always inserted at or below (2), and the load +// group of (1) is always inserted at or above (1). Thus, the instructions will +// never be reordered. All other dependences are checked to ensure the +// correctness of the instruction reordering. +// +// The algorithm visits all memory accesses in the loop in bottom-up program +// order. Program order is established by traversing the blocks in the loop in +// reverse postorder when collecting the accesses. +// +// We visit the memory accesses in bottom-up order because it can simplify the +// construction of store groups in the presence of write-after-write (WAW) +// dependences. +// +// E.g., for the WAW dependence: A[i] = a; // (1) +// A[i] = b; // (2) +// A[i + 1] = c; // (3) +// +// We will first create a store group with (3) and (2). (1) can't be added to +// this group because it and (2) are dependent. However, (1) can be grouped +// with other accesses that may precede it in program order. Note that a +// bottom-up order does not imply that WAW dependences should not be checked. +void InterleavedAccessInfo::analyzeInterleaving( + bool EnablePredicatedInterleavedMemAccesses) { + LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); + const ValueToValueMap &Strides = LAI->getSymbolicStrides(); + + // Holds all accesses with a constant stride. + MapVector<Instruction *, StrideDescriptor> AccessStrideInfo; + collectConstStrideAccesses(AccessStrideInfo, Strides); + + if (AccessStrideInfo.empty()) + return; + + // Collect the dependences in the loop. + collectDependences(); + + // Holds all interleaved store groups temporarily. + SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups; + // Holds all interleaved load groups temporarily. + SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups; + + // Search in bottom-up program order for pairs of accesses (A and B) that can + // form interleaved load or store groups. In the algorithm below, access A + // precedes access B in program order. We initialize a group for B in the + // outer loop of the algorithm, and then in the inner loop, we attempt to + // insert each A into B's group if: + // + // 1. A and B have the same stride, + // 2. A and B have the same memory object size, and + // 3. A belongs in B's group according to its distance from B. + // + // Special care is taken to ensure group formation will not break any + // dependences. + for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); + BI != E; ++BI) { + Instruction *B = BI->first; + StrideDescriptor DesB = BI->second; + + // Initialize a group for B if it has an allowable stride. Even if we don't + // create a group for B, we continue with the bottom-up algorithm to ensure + // we don't break any of B's dependences. + InterleaveGroup<Instruction> *Group = nullptr; + if (isStrided(DesB.Stride) && + (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { + Group = getInterleaveGroup(B); + if (!Group) { + LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B + << '\n'); + Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); + } + if (B->mayWriteToMemory()) + StoreGroups.insert(Group); + else + LoadGroups.insert(Group); + } + + for (auto AI = std::next(BI); AI != E; ++AI) { + Instruction *A = AI->first; + StrideDescriptor DesA = AI->second; + + // Our code motion strategy implies that we can't have dependences + // between accesses in an interleaved group and other accesses located + // between the first and last member of the group. Note that this also + // means that a group can't have more than one member at a given offset. + // The accesses in a group can have dependences with other accesses, but + // we must ensure we don't extend the boundaries of the group such that + // we encompass those dependent accesses. + // + // For example, assume we have the sequence of accesses shown below in a + // stride-2 loop: + // + // (1, 2) is a group | A[i] = a; // (1) + // | A[i-1] = b; // (2) | + // A[i-3] = c; // (3) + // A[i] = d; // (4) | (2, 4) is not a group + // + // Because accesses (2) and (3) are dependent, we can group (2) with (1) + // but not with (4). If we did, the dependent access (3) would be within + // the boundaries of the (2, 4) group. + if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { + // If a dependence exists and A is already in a group, we know that A + // must be a store since A precedes B and WAR dependences are allowed. + // Thus, A would be sunk below B. We release A's group to prevent this + // illegal code motion. A will then be free to form another group with + // instructions that precede it. + if (isInterleaved(A)) { + InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A); + StoreGroups.remove(StoreGroup); + releaseGroup(StoreGroup); + } + + // If a dependence exists and A is not already in a group (or it was + // and we just released it), B might be hoisted above A (if B is a + // load) or another store might be sunk below A (if B is a store). In + // either case, we can't add additional instructions to B's group. B + // will only form a group with instructions that it precedes. + break; + } + + // At this point, we've checked for illegal code motion. If either A or B + // isn't strided, there's nothing left to do. + if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) + continue; + + // Ignore A if it's already in a group or isn't the same kind of memory + // operation as B. + // Note that mayReadFromMemory() isn't mutually exclusive to + // mayWriteToMemory in the case of atomic loads. We shouldn't see those + // here, canVectorizeMemory() should have returned false - except for the + // case we asked for optimization remarks. + if (isInterleaved(A) || + (A->mayReadFromMemory() != B->mayReadFromMemory()) || + (A->mayWriteToMemory() != B->mayWriteToMemory())) + continue; + + // Check rules 1 and 2. Ignore A if its stride or size is different from + // that of B. + if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) + continue; + + // Ignore A if the memory object of A and B don't belong to the same + // address space + if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B)) + continue; + + // Calculate the distance from A to B. + const SCEVConstant *DistToB = dyn_cast<SCEVConstant>( + PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); + if (!DistToB) + continue; + int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); + + // Check rule 3. Ignore A if its distance to B is not a multiple of the + // size. + if (DistanceToB % static_cast<int64_t>(DesB.Size)) + continue; + + // All members of a predicated interleave-group must have the same predicate, + // and currently must reside in the same BB. + BasicBlock *BlockA = A->getParent(); + BasicBlock *BlockB = B->getParent(); + if ((isPredicated(BlockA) || isPredicated(BlockB)) && + (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) + continue; + + // The index of A is the index of B plus A's distance to B in multiples + // of the size. + int IndexA = + Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); + + // Try to insert A into B's group. + if (Group->insertMember(A, IndexA, DesA.Align)) { + LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' + << " into the interleave group with" << *B + << '\n'); + InterleaveGroupMap[A] = Group; + + // Set the first load in program order as the insert position. + if (A->mayReadFromMemory()) + Group->setInsertPos(A); + } + } // Iteration over A accesses. + } // Iteration over B accesses. + + // Remove interleaved store groups with gaps. + for (auto *Group : StoreGroups) + if (Group->getNumMembers() != Group->getFactor()) { + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved store group due " + "to gaps.\n"); + releaseGroup(Group); + } + // Remove interleaved groups with gaps (currently only loads) whose memory + // accesses may wrap around. We have to revisit the getPtrStride analysis, + // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does + // not check wrapping (see documentation there). + // FORNOW we use Assume=false; + // TODO: Change to Assume=true but making sure we don't exceed the threshold + // of runtime SCEV assumptions checks (thereby potentially failing to + // vectorize altogether). + // Additional optional optimizations: + // TODO: If we are peeling the loop and we know that the first pointer doesn't + // wrap then we can deduce that all pointers in the group don't wrap. + // This means that we can forcefully peel the loop in order to only have to + // check the first pointer for no-wrap. When we'll change to use Assume=true + // we'll only need at most one runtime check per interleaved group. + for (auto *Group : LoadGroups) { + // Case 1: A full group. Can Skip the checks; For full groups, if the wide + // load would wrap around the address space we would do a memory access at + // nullptr even without the transformation. + if (Group->getNumMembers() == Group->getFactor()) + continue; + + // Case 2: If first and last members of the group don't wrap this implies + // that all the pointers in the group don't wrap. + // So we check only group member 0 (which is always guaranteed to exist), + // and group member Factor - 1; If the latter doesn't exist we rely on + // peeling (if it is a non-reveresed accsess -- see Case 3). + Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); + if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, + /*ShouldCheckWrap=*/true)) { + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "first group member potentially pointer-wrapping.\n"); + releaseGroup(Group); + continue; + } + Instruction *LastMember = Group->getMember(Group->getFactor() - 1); + if (LastMember) { + Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); + if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, + /*ShouldCheckWrap=*/true)) { + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "last group member potentially pointer-wrapping.\n"); + releaseGroup(Group); + } + } else { + // Case 3: A non-reversed interleaved load group with gaps: We need + // to execute at least one scalar epilogue iteration. This will ensure + // we don't speculatively access memory out-of-bounds. We only need + // to look for a member at index factor - 1, since every group must have + // a member at index zero. + if (Group->isReverse()) { + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "a reverse access with gaps.\n"); + releaseGroup(Group); + continue; + } + LLVM_DEBUG( + dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); + RequiresScalarEpilogue = true; + } + } +} + +void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() { + // If no group had triggered the requirement to create an epilogue loop, + // there is nothing to do. + if (!requiresScalarEpilogue()) + return; + + // Avoid releasing a Group twice. + SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; + for (auto &I : InterleaveGroupMap) { + InterleaveGroup<Instruction> *Group = I.second; + if (Group->requiresScalarEpilogue()) + DelSet.insert(Group); + } + for (auto *Ptr : DelSet) { + LLVM_DEBUG( + dbgs() + << "LV: Invalidate candidate interleaved group due to gaps that " + "require a scalar epilogue (not allowed under optsize) and cannot " + "be masked (not enabled). \n"); + releaseGroup(Ptr); + } + + RequiresScalarEpilogue = false; +} + +template <typename InstT> +void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const { + llvm_unreachable("addMetadata can only be used for Instruction"); +} + +namespace llvm { +template <> +void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const { + SmallVector<Value *, 4> VL; + std::transform(Members.begin(), Members.end(), std::back_inserter(VL), + [](std::pair<int, Instruction *> p) { return p.second; }); + propagateMetadata(NewInst, VL); +} +} |