diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-09 19:58:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-09 19:58:18 +0000 |
commit | aca2e42c67292825f835f094eb0c4df5ce6013db (patch) | |
tree | 9cfb7eeef35545100c4f7219e794e6a0306ea6a6 /llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | |
parent | 77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff) |
Vendor import of llvm-project main llvmorg-18-init-16595-g7c00a5be5cde.vendor/llvm-project/llvmorg-18-init-16595-g7c00a5be5cde
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 88 |
1 files changed, 85 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 76961629aece..1f844bce2310 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <cassert> @@ -119,7 +120,9 @@ bool VPRecipeBase::mayHaveSideEffects() const { return false; case VPInstructionSC: switch (cast<VPInstruction>(this)->getOpcode()) { + case Instruction::Or: case Instruction::ICmp: + case Instruction::Select: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: @@ -401,6 +404,84 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); return CondBr; } + case VPInstruction::ComputeReductionResult: { + if (Part != 0) + return State.get(this, 0); + + // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary + // and will be removed by breaking up the recipe further. + auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); + auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); + // Get its reduction variable descriptor. + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + + RecurKind RK = RdxDesc.getRecurrenceKind(); + + State.setDebugLocFrom(getDebugLoc()); + + VPValue *LoopExitingDef = getOperand(1); + Type *PhiTy = OrigPhi->getType(); + VectorParts RdxParts(State.UF); + for (unsigned Part = 0; Part < State.UF; ++Part) + RdxParts[Part] = State.get(LoopExitingDef, Part); + + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + // TODO: Handle this in truncateToMinBW. + if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); + for (unsigned Part = 0; Part < State.UF; ++Part) + RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + } + // Reduce all of the unrolled parts into a single vector. + Value *ReducedPartRdx = RdxParts[0]; + unsigned Op = RecurrenceDescriptor::getOpcode(RK); + + if (PhiR->isOrdered()) { + ReducedPartRdx = RdxParts[State.UF - 1]; + } else { + // Floating-point operations should have some FMF to enable the reduction. + IRBuilderBase::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + for (unsigned Part = 1; Part < State.UF; ++Part) { + Value *RdxPart = RdxParts[Part]; + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + ReducedPartRdx = Builder.CreateBinOp( + (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); + else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + TrackingVH<Value> ReductionStartValue = + RdxDesc.getRecurrenceStartValue(); + ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, + ReducedPartRdx, RdxPart); + } else + ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); + } + } + + // Create the reduction after the loop. Note that inloop reductions create + // the target reduction in the loop using a Reduction recipe. + if (State.VF.isVector() && !PhiR->isInLoop()) { + ReducedPartRdx = + createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); + // If the reduction can be performed in a smaller type, we need to extend + // the reduction to the wider type before we branch to the original loop. + if (PhiTy != RdxDesc.getRecurrenceType()) + ReducedPartRdx = RdxDesc.isSigned() + ? Builder.CreateSExt(ReducedPartRdx, PhiTy) + : Builder.CreateZExt(ReducedPartRdx, PhiTy); + } + + // If there were stores of the reduction value to a uniform memory address + // inside the loop, create the final store here. + if (StoreInst *SI = RdxDesc.IntermediateStore) { + auto *NewSI = Builder.CreateAlignedStore( + ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); + propagateMetadata(NewSI, SI); + } + + return ReducedPartRdx; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -477,6 +558,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BranchOnCount: O << "branch-on-count"; break; + case VPInstruction::ComputeReductionResult: + O << "compute-reduction-result"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1225,9 +1309,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); - bool InBounds = false; - if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = GEP->isInBounds(); + bool InBounds = isInBounds(); if (IsReverse) { // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. |