aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-09 19:58:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-01-09 19:58:18 +0000
commitaca2e42c67292825f835f094eb0c4df5ce6013db (patch)
tree9cfb7eeef35545100c4f7219e794e6a0306ea6a6 /llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
parent77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff)
Vendor import of llvm-project main llvmorg-18-init-16595-g7c00a5be5cde.vendor/llvm-project/llvmorg-18-init-16595-g7c00a5be5cde
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp88
1 files changed, 85 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 76961629aece..1f844bce2310 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
@@ -119,7 +120,9 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return false;
case VPInstructionSC:
switch (cast<VPInstruction>(this)->getOpcode()) {
+ case Instruction::Or:
case Instruction::ICmp:
+ case Instruction::Select:
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
@@ -401,6 +404,84 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
return CondBr;
}
+ case VPInstruction::ComputeReductionResult: {
+ if (Part != 0)
+ return State.get(this, 0);
+
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
+ // Get its reduction variable descriptor.
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+
+ State.setDebugLocFrom(getDebugLoc());
+
+ VPValue *LoopExitingDef = getOperand(1);
+ Type *PhiTy = OrigPhi->getType();
+ VectorParts RdxParts(State.UF);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ RdxParts[Part] = State.get(LoopExitingDef, Part);
+
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ // TODO: Handle this in truncateToMinBW.
+ if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
+ }
+ // Reduce all of the unrolled parts into a single vector.
+ Value *ReducedPartRdx = RdxParts[0];
+ unsigned Op = RecurrenceDescriptor::getOpcode(RK);
+
+ if (PhiR->isOrdered()) {
+ ReducedPartRdx = RdxParts[State.UF - 1];
+ } else {
+ // Floating-point operations should have some FMF to enable the reduction.
+ IRBuilderBase::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+ for (unsigned Part = 1; Part < State.UF; ++Part) {
+ Value *RdxPart = RdxParts[Part];
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ ReducedPartRdx = Builder.CreateBinOp(
+ (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
+ else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ TrackingVH<Value> ReductionStartValue =
+ RdxDesc.getRecurrenceStartValue();
+ ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
+ ReducedPartRdx, RdxPart);
+ } else
+ ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
+ }
+ }
+
+ // Create the reduction after the loop. Note that inloop reductions create
+ // the target reduction in the loop using a Reduction recipe.
+ if (State.VF.isVector() && !PhiR->isInLoop()) {
+ ReducedPartRdx =
+ createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
+ // If the reduction can be performed in a smaller type, we need to extend
+ // the reduction to the wider type before we branch to the original loop.
+ if (PhiTy != RdxDesc.getRecurrenceType())
+ ReducedPartRdx = RdxDesc.isSigned()
+ ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
+ : Builder.CreateZExt(ReducedPartRdx, PhiTy);
+ }
+
+ // If there were stores of the reduction value to a uniform memory address
+ // inside the loop, create the final store here.
+ if (StoreInst *SI = RdxDesc.IntermediateStore) {
+ auto *NewSI = Builder.CreateAlignedStore(
+ ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
+ propagateMetadata(NewSI, SI);
+ }
+
+ return ReducedPartRdx;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -477,6 +558,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCount:
O << "branch-on-count";
break;
+ case VPInstruction::ComputeReductionResult:
+ O << "compute-reduction-result";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -1225,9 +1309,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
? DL.getIndexType(IndexedTy->getPointerTo())
: Builder.getInt32Ty();
Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
- bool InBounds = false;
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = GEP->isInBounds();
+ bool InBounds = isInBounds();
if (IsReverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.