diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp | 467 |
1 files changed, 0 insertions, 467 deletions
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp deleted file mode 100644 index e5ab24e52df6..000000000000 --- a/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp +++ /dev/null @@ -1,467 +0,0 @@ -//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// This file implements SLP analysis based on VPlan. The analysis is based on -/// the ideas described in -/// -/// Look-ahead SLP: auto-vectorization in the presence of commutative -/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha, -/// Luís F. W. Góes -/// -//===----------------------------------------------------------------------===// - -#include "VPlan.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/VectorUtils.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <cassert> -#include <iterator> -#include <string> -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "vplan-slp" - -// Number of levels to look ahead when re-ordering multi node operands. -static unsigned LookaheadMaxDepth = 5; - -VPInstruction *VPlanSlp::markFailed() { - // FIXME: Currently this is used to signal we hit instructions we cannot - // trivially SLP'ize. - CompletelySLP = false; - return nullptr; -} - -void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) { - if (all_of(Operands, [](VPValue *V) { - return cast<VPInstruction>(V)->getUnderlyingInstr(); - })) { - unsigned BundleSize = 0; - for (VPValue *V : Operands) { - Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType(); - assert(!T->isVectorTy() && "Only scalar types supported for now"); - BundleSize += T->getScalarSizeInBits(); - } - WidestBundleBits = std::max(WidestBundleBits, BundleSize); - } - - auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New); - assert(Res.second && - "Already created a combined instruction for the operand bundle"); - (void)Res; -} - -bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const { - // Currently we only support VPInstructions. - if (!all_of(Operands, [](VPValue *Op) { - return Op && isa<VPInstruction>(Op) && - cast<VPInstruction>(Op)->getUnderlyingInstr(); - })) { - LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n"); - return false; - } - - // Check if opcodes and type width agree for all instructions in the bundle. - // FIXME: Differing widths/opcodes can be handled by inserting additional - // instructions. - // FIXME: Deal with non-primitive types. - const Instruction *OriginalInstr = - cast<VPInstruction>(Operands[0])->getUnderlyingInstr(); - unsigned Opcode = OriginalInstr->getOpcode(); - unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits(); - if (!all_of(Operands, [Opcode, Width](VPValue *Op) { - const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr(); - return I->getOpcode() == Opcode && - I->getType()->getPrimitiveSizeInBits() == Width; - })) { - LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n"); - return false; - } - - // For now, all operands must be defined in the same BB. - if (any_of(Operands, [this](VPValue *Op) { - return cast<VPInstruction>(Op)->getParent() != &this->BB; - })) { - LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n"); - return false; - } - - if (any_of(Operands, - [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) { - LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n"); - return false; - } - - // For loads, check that there are no instructions writing to memory in - // between them. - // TODO: we only have to forbid instructions writing to memory that could - // interfere with any of the loads in the bundle - if (Opcode == Instruction::Load) { - unsigned LoadsSeen = 0; - VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent(); - for (auto &I : *Parent) { - auto *VPI = cast<VPInstruction>(&I); - if (VPI->getOpcode() == Instruction::Load && - std::find(Operands.begin(), Operands.end(), VPI) != Operands.end()) - LoadsSeen++; - - if (LoadsSeen == Operands.size()) - break; - if (LoadsSeen > 0 && VPI->mayWriteToMemory()) { - LLVM_DEBUG( - dbgs() << "VPSLP: instruction modifying memory between loads\n"); - return false; - } - } - - if (!all_of(Operands, [](VPValue *Op) { - return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr()) - ->isSimple(); - })) { - LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n"); - return false; - } - } - - if (Opcode == Instruction::Store) - if (!all_of(Operands, [](VPValue *Op) { - return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr()) - ->isSimple(); - })) { - LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n"); - return false; - } - - return true; -} - -static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values, - unsigned OperandIndex) { - SmallVector<VPValue *, 4> Operands; - for (VPValue *V : Values) { - auto *U = cast<VPUser>(V); - Operands.push_back(U->getOperand(OperandIndex)); - } - return Operands; -} - -static bool areCommutative(ArrayRef<VPValue *> Values) { - return Instruction::isCommutative( - cast<VPInstruction>(Values[0])->getOpcode()); -} - -static SmallVector<SmallVector<VPValue *, 4>, 4> -getOperands(ArrayRef<VPValue *> Values) { - SmallVector<SmallVector<VPValue *, 4>, 4> Result; - auto *VPI = cast<VPInstruction>(Values[0]); - - switch (VPI->getOpcode()) { - case Instruction::Load: - llvm_unreachable("Loads terminate a tree, no need to get operands"); - case Instruction::Store: - Result.push_back(getOperands(Values, 0)); - break; - default: - for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I) - Result.push_back(getOperands(Values, I)); - break; - } - - return Result; -} - -/// Returns the opcode of Values or ~0 if they do not all agree. -static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) { - unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode(); - if (any_of(Values, [Opcode](VPValue *V) { - return cast<VPInstruction>(V)->getOpcode() != Opcode; - })) - return None; - return {Opcode}; -} - -/// Returns true if A and B access sequential memory if they are loads or -/// stores or if they have identical opcodes otherwise. -static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, - VPInterleavedAccessInfo &IAI) { - if (A->getOpcode() != B->getOpcode()) - return false; - - if (A->getOpcode() != Instruction::Load && - A->getOpcode() != Instruction::Store) - return true; - auto *GA = IAI.getInterleaveGroup(A); - auto *GB = IAI.getInterleaveGroup(B); - - return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B); -} - -/// Implements getLAScore from Listing 7 in the paper. -/// Traverses and compares operands of V1 and V2 to MaxLevel. -static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, - VPInterleavedAccessInfo &IAI) { - if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2)) - return 0; - - if (MaxLevel == 0) - return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1), - cast<VPInstruction>(V2), IAI); - - unsigned Score = 0; - for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I) - for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J) - Score += getLAScore(cast<VPUser>(V1)->getOperand(I), - cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI); - return Score; -} - -std::pair<VPlanSlp::OpMode, VPValue *> -VPlanSlp::getBest(OpMode Mode, VPValue *Last, - SmallPtrSetImpl<VPValue *> &Candidates, - VPInterleavedAccessInfo &IAI) { - assert((Mode == OpMode::Load || Mode == OpMode::Opcode) && - "Currently we only handle load and commutative opcodes"); - LLVM_DEBUG(dbgs() << " getBest\n"); - - SmallVector<VPValue *, 4> BestCandidates; - LLVM_DEBUG(dbgs() << " Candidates for " - << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " "); - for (auto *Candidate : Candidates) { - auto *LastI = cast<VPInstruction>(Last); - auto *CandidateI = cast<VPInstruction>(Candidate); - if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) { - LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr() - << " "); - BestCandidates.push_back(Candidate); - } - } - LLVM_DEBUG(dbgs() << "\n"); - - if (BestCandidates.empty()) - return {OpMode::Failed, nullptr}; - - if (BestCandidates.size() == 1) - return {Mode, BestCandidates[0]}; - - VPValue *Best = nullptr; - unsigned BestScore = 0; - for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) { - unsigned PrevScore = ~0u; - bool AllSame = true; - - // FIXME: Avoid visiting the same operands multiple times. - for (auto *Candidate : BestCandidates) { - unsigned Score = getLAScore(Last, Candidate, Depth, IAI); - if (PrevScore == ~0u) - PrevScore = Score; - if (PrevScore != Score) - AllSame = false; - PrevScore = Score; - - if (Score > BestScore) { - BestScore = Score; - Best = Candidate; - } - } - if (!AllSame) - break; - } - LLVM_DEBUG(dbgs() << "Found best " - << *cast<VPInstruction>(Best)->getUnderlyingInstr() - << "\n"); - Candidates.erase(Best); - - return {Mode, Best}; -} - -SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() { - SmallVector<MultiNodeOpTy, 4> FinalOrder; - SmallVector<OpMode, 4> Mode; - FinalOrder.reserve(MultiNodeOps.size()); - Mode.reserve(MultiNodeOps.size()); - - LLVM_DEBUG(dbgs() << "Reordering multinode\n"); - - for (auto &Operands : MultiNodeOps) { - FinalOrder.push_back({Operands.first, {Operands.second[0]}}); - if (cast<VPInstruction>(Operands.second[0])->getOpcode() == - Instruction::Load) - Mode.push_back(OpMode::Load); - else - Mode.push_back(OpMode::Opcode); - } - - for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) { - LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n"); - SmallPtrSet<VPValue *, 4> Candidates; - LLVM_DEBUG(dbgs() << " Candidates "); - for (auto Ops : MultiNodeOps) { - LLVM_DEBUG( - dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr() - << " "); - Candidates.insert(Ops.second[Lane]); - } - LLVM_DEBUG(dbgs() << "\n"); - - for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) { - LLVM_DEBUG(dbgs() << " Checking " << Op << "\n"); - if (Mode[Op] == OpMode::Failed) - continue; - - VPValue *Last = FinalOrder[Op].second[Lane - 1]; - std::pair<OpMode, VPValue *> Res = - getBest(Mode[Op], Last, Candidates, IAI); - if (Res.second) - FinalOrder[Op].second.push_back(Res.second); - else - // TODO: handle this case - FinalOrder[Op].second.push_back(markFailed()); - } - } - - return FinalOrder; -} - -void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) { - dbgs() << " Ops: "; - for (auto Op : Values) - if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr()) - dbgs() << *Instr << " | "; - else - dbgs() << " nullptr | "; - dbgs() << "\n"; -} - -VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) { - assert(!Values.empty() && "Need some operands!"); - - // If we already visited this instruction bundle, re-use the existing node - auto I = BundleToCombined.find(to_vector<4>(Values)); - if (I != BundleToCombined.end()) { -#ifndef NDEBUG - // Check that the resulting graph is a tree. If we re-use a node, this means - // its values have multiple users. We only allow this, if all users of each - // value are the same instruction. - for (auto *V : Values) { - auto UI = V->user_begin(); - auto *FirstUser = *UI++; - while (UI != V->user_end()) { - assert(*UI == FirstUser && "Currently we only support SLP trees."); - UI++; - } - } -#endif - return I->second; - } - - // Dump inputs - LLVM_DEBUG({ - dbgs() << "buildGraph: "; - dumpBundle(Values); - }); - - if (!areVectorizable(Values)) - return markFailed(); - - assert(getOpcode(Values) && "Opcodes for all values must match"); - unsigned ValuesOpcode = getOpcode(Values).getValue(); - - SmallVector<VPValue *, 4> CombinedOperands; - if (areCommutative(Values)) { - bool MultiNodeRoot = !MultiNodeActive; - MultiNodeActive = true; - for (auto &Operands : getOperands(Values)) { - LLVM_DEBUG({ - dbgs() << " Visiting Commutative"; - dumpBundle(Operands); - }); - - auto OperandsOpcode = getOpcode(Operands); - if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) { - LLVM_DEBUG(dbgs() << " Same opcode, continue building\n"); - CombinedOperands.push_back(buildGraph(Operands)); - } else { - LLVM_DEBUG(dbgs() << " Adding multinode Ops\n"); - // Create dummy VPInstruction, which will we replace later by the - // re-ordered operand. - VPInstruction *Op = new VPInstruction(0, {}); - CombinedOperands.push_back(Op); - MultiNodeOps.emplace_back(Op, Operands); - } - } - - if (MultiNodeRoot) { - LLVM_DEBUG(dbgs() << "Reorder \n"); - MultiNodeActive = false; - - auto FinalOrder = reorderMultiNodeOps(); - - MultiNodeOps.clear(); - for (auto &Ops : FinalOrder) { - VPInstruction *NewOp = buildGraph(Ops.second); - Ops.first->replaceAllUsesWith(NewOp); - for (unsigned i = 0; i < CombinedOperands.size(); i++) - if (CombinedOperands[i] == Ops.first) - CombinedOperands[i] = NewOp; - delete Ops.first; - Ops.first = NewOp; - } - LLVM_DEBUG(dbgs() << "Found final order\n"); - } - } else { - LLVM_DEBUG(dbgs() << " NonCommuntative\n"); - if (ValuesOpcode == Instruction::Load) - for (VPValue *V : Values) - CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0)); - else - for (auto &Operands : getOperands(Values)) - CombinedOperands.push_back(buildGraph(Operands)); - } - - unsigned Opcode; - switch (ValuesOpcode) { - case Instruction::Load: - Opcode = VPInstruction::SLPLoad; - break; - case Instruction::Store: - Opcode = VPInstruction::SLPStore; - break; - default: - Opcode = ValuesOpcode; - break; - } - - if (!CompletelySLP) - return markFailed(); - - assert(CombinedOperands.size() > 0 && "Need more some operands"); - auto *VPI = new VPInstruction(Opcode, CombinedOperands); - VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr()); - - LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs()); - cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n"); - addCombined(Values, VPI); - return VPI; -} |