diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 174 |
1 files changed, 140 insertions, 34 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 90598937affc..d12624ffb824 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -414,6 +414,10 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder) { + // Shufflevectors can only be created for fixed-width vectors. + if (!isa<FixedVectorType>(ExtElt->getOperand(0)->getType())) + return nullptr; + // If the extract can be constant-folded, this code is unsimplified. Defer // to other passes to handle that. Value *X = ExtElt->getVectorOperand(); @@ -1249,14 +1253,20 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() || VT != Op0->getType()) return false; - auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0)); - auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1)); - auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0)); - auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1)); + auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0)); + auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1)); + auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0)); + auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1)); + SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B}); auto checkSVNonOpUses = [&](Instruction *I) { if (!I || I->getOperand(0)->getType() != VT) return true; - return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; }); + return any_of(I->users(), [&](User *U) { + return U != Op0 && U != Op1 && + !(isa<ShuffleVectorInst>(U) && + (InputShuffles.contains(cast<Instruction>(U)) || + isInstructionTriviallyDead(cast<Instruction>(U)))); + }); }; if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) || checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B)) @@ -1271,6 +1281,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { auto *SV = dyn_cast<ShuffleVectorInst>(U); if (!SV || SV->getType() != VT) return false; + if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) || + (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1)) + return false; if (!llvm::is_contained(Shuffles, SV)) Shuffles.push_back(SV); } @@ -1283,13 +1296,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { if (FromReduction && Shuffles.size() > 1) return false; + // Add any shuffle uses for the shuffles we have found, to include them in our + // cost calculations. + if (!FromReduction) { + for (ShuffleVectorInst *SV : Shuffles) { + for (auto U : SV->users()) { + ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U); + if (SSV && isa<UndefValue>(SSV->getOperand(1))) + Shuffles.push_back(SSV); + } + } + } + // For each of the output shuffles, we try to sort all the first vector // elements to the beginning, followed by the second array elements at the // end. If the binops are legalized to smaller vectors, this may reduce total // number of binops. We compute the ReconstructMask mask needed to convert // back to the original lane order. - SmallVector<int> V1, V2; - SmallVector<SmallVector<int>> ReconstructMasks; + SmallVector<std::pair<int, int>> V1, V2; + SmallVector<SmallVector<int>> OrigReconstructMasks; int MaxV1Elt = 0, MaxV2Elt = 0; unsigned NumElts = VT->getNumElements(); for (ShuffleVectorInst *SVN : Shuffles) { @@ -1300,6 +1325,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { // case we need to commute the mask). Value *SVOp0 = SVN->getOperand(0); Value *SVOp1 = SVN->getOperand(1); + if (isa<UndefValue>(SVOp1)) { + auto *SSV = cast<ShuffleVectorInst>(SVOp0); + SVOp0 = SSV->getOperand(0); + SVOp1 = SSV->getOperand(1); + for (unsigned I = 0, E = Mask.size(); I != E; I++) { + if (Mask[I] >= static_cast<int>(SSV->getShuffleMask().size())) + return false; + Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]); + } + } if (SVOp0 == Op1 && SVOp1 == Op0) { std::swap(SVOp0, SVOp1); ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); @@ -1316,21 +1351,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { ReconstructMask.push_back(-1); } else if (Mask[I] < static_cast<int>(NumElts)) { MaxV1Elt = std::max(MaxV1Elt, Mask[I]); - auto It = find(V1, Mask[I]); + auto It = find_if(V1, [&](const std::pair<int, int> &A) { + return Mask[I] == A.first; + }); if (It != V1.end()) ReconstructMask.push_back(It - V1.begin()); else { ReconstructMask.push_back(V1.size()); - V1.push_back(Mask[I]); + V1.emplace_back(Mask[I], V1.size()); } } else { MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts); - auto It = find(V2, Mask[I] - NumElts); + auto It = find_if(V2, [&](const std::pair<int, int> &A) { + return Mask[I] - static_cast<int>(NumElts) == A.first; + }); if (It != V2.end()) ReconstructMask.push_back(NumElts + It - V2.begin()); else { ReconstructMask.push_back(NumElts + V2.size()); - V2.push_back(Mask[I] - NumElts); + V2.emplace_back(Mask[I] - NumElts, NumElts + V2.size()); } } } @@ -1339,7 +1378,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { // result. In-order can help simplify the shuffle away. if (FromReduction) sort(ReconstructMask); - ReconstructMasks.push_back(ReconstructMask); + OrigReconstructMasks.push_back(std::move(ReconstructMask)); } // If the Maximum element used from V1 and V2 are not larger than the new @@ -1351,16 +1390,68 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { MaxV2Elt == static_cast<int>(V2.size()) - 1)) return false; + // GetBaseMaskValue takes one of the inputs, which may either be a shuffle, a + // shuffle of another shuffle, or not a shuffle (that is treated like a + // identity shuffle). + auto GetBaseMaskValue = [&](Instruction *I, int M) { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return M; + if (isa<UndefValue>(SV->getOperand(1))) + if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0))) + if (InputShuffles.contains(SSV)) + return SSV->getMaskValue(SV->getMaskValue(M)); + return SV->getMaskValue(M); + }; + + // Attempt to sort the inputs my ascending mask values to make simpler input + // shuffles and push complex shuffles down to the uses. We sort on the first + // of the two input shuffle orders, to try and get at least one input into a + // nice order. + auto SortBase = [&](Instruction *A, std::pair<int, int> X, + std::pair<int, int> Y) { + int MXA = GetBaseMaskValue(A, X.first); + int MYA = GetBaseMaskValue(A, Y.first); + return MXA < MYA; + }; + stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) { + return SortBase(SVI0A, A, B); + }); + stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) { + return SortBase(SVI1A, A, B); + }); + // Calculate our ReconstructMasks from the OrigReconstructMasks and the + // modified order of the input shuffles. + SmallVector<SmallVector<int>> ReconstructMasks; + for (auto Mask : OrigReconstructMasks) { + SmallVector<int> ReconstructMask; + for (int M : Mask) { + auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) { + auto It = find_if(V, [M](auto A) { return A.second == M; }); + assert(It != V.end() && "Expected all entries in Mask"); + return std::distance(V.begin(), It); + }; + if (M < 0) + ReconstructMask.push_back(-1); + else if (M < static_cast<int>(NumElts)) { + ReconstructMask.push_back(FindIndex(V1, M)); + } else { + ReconstructMask.push_back(NumElts + FindIndex(V2, M)); + } + } + ReconstructMasks.push_back(std::move(ReconstructMask)); + } + // Calculate the masks needed for the new input shuffles, which get padded // with undef SmallVector<int> V1A, V1B, V2A, V2B; for (unsigned I = 0; I < V1.size(); I++) { - V1A.push_back(SVI0A->getMaskValue(V1[I])); - V1B.push_back(SVI0B->getMaskValue(V1[I])); + V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first)); + V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first)); } for (unsigned I = 0; I < V2.size(); I++) { - V2A.push_back(SVI1A->getMaskValue(V2[I])); - V2B.push_back(SVI1B->getMaskValue(V2[I])); + V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first)); + V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first)); } while (V1A.size() < NumElts) { V1A.push_back(UndefMaskElem); @@ -1371,9 +1462,14 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { V2B.push_back(UndefMaskElem); } - auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) { - return C + - TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask()); + auto AddShuffleCost = [&](InstructionCost C, Instruction *I) { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return C; + return C + TTI.getShuffleCost(isa<UndefValue>(SV->getOperand(1)) + ? TTI::SK_PermuteSingleSrc + : TTI::SK_PermuteTwoSrc, + VT, SV->getShuffleMask()); }; auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) { return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask); @@ -1386,9 +1482,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { TTI.getArithmeticInstrCost(Op1->getOpcode(), VT); CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(), InstructionCost(0), AddShuffleCost); - // This set helps us only cost each unique shuffle once. - SmallPtrSet<ShuffleVectorInst *, 4> InputShuffles( - {SVI0A, SVI0B, SVI1A, SVI1B}); CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(), InstructionCost(0), AddShuffleCost); @@ -1408,22 +1501,35 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(), InstructionCost(0), AddShuffleMaskCost); + LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n"); + LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore + << " vs CostAfter: " << CostAfter << "\n"); if (CostBefore <= CostAfter) return false; // The cost model has passed, create the new instructions. - Builder.SetInsertPoint(SVI0A); - Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0), - SVI0A->getOperand(1), V1A); - Builder.SetInsertPoint(SVI0B); - Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0), - SVI0B->getOperand(1), V1B); - Builder.SetInsertPoint(SVI1A); - Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0), - SVI1A->getOperand(1), V2A); - Builder.SetInsertPoint(SVI1B); - Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0), - SVI1B->getOperand(1), V2B); + auto GetShuffleOperand = [&](Instruction *I, unsigned Op) -> Value * { + auto *SV = dyn_cast<ShuffleVectorInst>(I); + if (!SV) + return I; + if (isa<UndefValue>(SV->getOperand(1))) + if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0))) + if (InputShuffles.contains(SSV)) + return SSV->getOperand(Op); + return SV->getOperand(Op); + }; + Builder.SetInsertPoint(SVI0A->getNextNode()); + Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0), + GetShuffleOperand(SVI0A, 1), V1A); + Builder.SetInsertPoint(SVI0B->getNextNode()); + Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0), + GetShuffleOperand(SVI0B, 1), V1B); + Builder.SetInsertPoint(SVI1A->getNextNode()); + Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0), + GetShuffleOperand(SVI1A, 1), V2A); + Builder.SetInsertPoint(SVI1B->getNextNode()); + Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0), + GetShuffleOperand(SVI1B, 1), V2B); Builder.SetInsertPoint(Op0); Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(), NSV0A, NSV0B); |