src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2024-01-09 19:58:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-01-09 19:58:18 +0000
commit	aca2e42c67292825f835f094eb0c4df5ce6013db (patch)
tree	9cfb7eeef35545100c4f7219e794e6a0306ea6a6 /llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
parent	77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff)

Vendor import of llvm-project main llvmorg-18-init-16595-g7c00a5be5cde.vendor/llvm-project/llvmorg-18-init-16595-g7c00a5be5cde

Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')

-rw-r--r--

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

1 files changed, 85 insertions, 3 deletions

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 76961629aece..1f844bce2310 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

@@ -28,6 +28,7 @@

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

+#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <cassert>

@@ -119,7 +120,9 @@ bool VPRecipeBase::mayHaveSideEffects() const {

return false;

case VPInstructionSC:

switch (cast<VPInstruction>(this)->getOpcode()) {

+ case Instruction::Or:

case Instruction::ICmp:

+ case Instruction::Select:

case VPInstruction::Not:

case VPInstruction::CalculateTripCountMinusVF:

case VPInstruction::CanonicalIVIncrementForPart:

@@ -401,6 +404,84 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,

Builder.GetInsertBlock()->getTerminator()->eraseFromParent();

return CondBr;

}

+ case VPInstruction::ComputeReductionResult: {

+ if (Part != 0)

+ return State.get(this, 0);

+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary

+ // and will be removed by breaking up the recipe further.

+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));

+ auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());

+ // Get its reduction variable descriptor.

+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

+ RecurKind RK = RdxDesc.getRecurrenceKind();

+ State.setDebugLocFrom(getDebugLoc());

+ VPValue *LoopExitingDef = getOperand(1);

+ Type *PhiTy = OrigPhi->getType();

+ VectorParts RdxParts(State.UF);

+ for (unsigned Part = 0; Part < State.UF; ++Part)

+ RdxParts[Part] = State.get(LoopExitingDef, Part);

+ // If the vector reduction can be performed in a smaller type, we truncate

+ // then extend the loop exit value to enable InstCombine to evaluate the

+ // entire expression in the smaller type.

+ // TODO: Handle this in truncateToMinBW.

+ if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {

+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);

+ for (unsigned Part = 0; Part < State.UF; ++Part)

+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);

+ }

+ // Reduce all of the unrolled parts into a single vector.

+ Value *ReducedPartRdx = RdxParts[0];

+ unsigned Op = RecurrenceDescriptor::getOpcode(RK);

+ if (PhiR->isOrdered()) {

+ ReducedPartRdx = RdxParts[State.UF - 1];

+ } else {

+ // Floating-point operations should have some FMF to enable the reduction.

+ IRBuilderBase::FastMathFlagGuard FMFG(Builder);

+ Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

+ for (unsigned Part = 1; Part < State.UF; ++Part) {

+ Value *RdxPart = RdxParts[Part];

+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)

+ ReducedPartRdx = Builder.CreateBinOp(

+ (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");

+ else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {

+ TrackingVH<Value> ReductionStartValue =

+ RdxDesc.getRecurrenceStartValue();

+ ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,

+ ReducedPartRdx, RdxPart);

+ } else

+ ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);

+ }

+ // Create the reduction after the loop. Note that inloop reductions create

+ // the target reduction in the loop using a Reduction recipe.

+ if (State.VF.isVector() && !PhiR->isInLoop()) {

+ ReducedPartRdx =

+ createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);

+ // If the reduction can be performed in a smaller type, we need to extend

+ // the reduction to the wider type before we branch to the original loop.

+ if (PhiTy != RdxDesc.getRecurrenceType())

+ ReducedPartRdx = RdxDesc.isSigned()

+ ? Builder.CreateSExt(ReducedPartRdx, PhiTy)

+ : Builder.CreateZExt(ReducedPartRdx, PhiTy);

+ }

+ // If there were stores of the reduction value to a uniform memory address

+ // inside the loop, create the final store here.

+ if (StoreInst *SI = RdxDesc.IntermediateStore) {

+ auto *NewSI = Builder.CreateAlignedStore(

+ ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());

+ propagateMetadata(NewSI, SI);

+ }

+ return ReducedPartRdx;

+ }

default:

llvm_unreachable("Unsupported opcode for instruction");

}

@@ -477,6 +558,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,

case VPInstruction::BranchOnCount:

O << "branch-on-count";

break;

+ case VPInstruction::ComputeReductionResult:

+ O << "compute-reduction-result";

+ break;

default:

O << Instruction::getOpcodeName(getOpcode());

}

@@ -1225,9 +1309,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {

? DL.getIndexType(IndexedTy->getPointerTo())

: Builder.getInt32Ty();

Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));

- bool InBounds = false;

- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))

- InBounds = GEP->isInBounds();

+ bool InBounds = isInBounds();

if (IsReverse) {

// If the address is consecutive but reversed, then the

// wide store needs to start at the last vector element.