aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp328
1 files changed, 275 insertions, 53 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 303cf11d4f30..51c52aad3594 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -20,11 +20,12 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -149,7 +150,7 @@ bool matchAArch64MulConstCombine(
APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
// The following code is ported from AArch64ISelLowering.
// Multiplication of a power of two plus/minus one can be done more
- // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // cheaply as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
@@ -339,26 +340,65 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
Store.eraseFromParent();
}
-class AArch64PostLegalizerCombinerImpl : public GIMatchTableExecutor {
+bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (!DstTy.isVector())
+ return false;
+
+ Register AO1, AO2, BVO1, BVO2;
+ if (!mi_match(MI, MRI,
+ m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
+ m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
+ return false;
+
+ auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
+ auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
+ if (!BV1 || !BV2)
+ return false;
+
+ for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
+ auto ValAndVReg1 =
+ getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
+ auto ValAndVReg2 =
+ getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
+ if (!ValAndVReg1 || !ValAndVReg2 ||
+ ValAndVReg1->Value != ~ValAndVReg2->Value)
+ return false;
+ }
+
+ MatchInfo = {AO1, AO2, BVO1};
+ return true;
+}
+
+void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(
+ AArch64::G_BSP, {MI.getOperand(0).getReg()},
+ {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
+ MI.eraseFromParent();
+}
+
+class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
- CombinerHelper &Helper;
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
-
const AArch64Subtarget &STI;
- MachineRegisterInfo &MRI;
- GISelChangeObserver &Observer;
- MachineIRBuilder &B;
- MachineFunction &MF;
public:
AArch64PostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper);
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
static const char *getName() { return "AArch64PostLegalizerCombiner"; }
- bool tryCombineAll(MachineInstr &I) const;
+ bool tryCombineAll(MachineInstr &I) const override;
private:
#define GET_GICOMBINER_CLASS_MEMBERS
@@ -371,49 +411,20 @@ private:
#undef GET_GICOMBINER_IMPL
AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
- const AArch64Subtarget &STI, GISelChangeObserver &Observer,
- MachineIRBuilder &B, CombinerHelper &Helper)
- : Helper(Helper), RuleConfig(RuleConfig), STI(STI), MRI(*B.getMRI()),
- Observer(Observer), B(B), MF(B.getMF()),
+ const AArch64Subtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
{
}
-class AArch64PostLegalizerCombinerInfo : public CombinerInfo {
- GISelKnownBits *KB;
- MachineDominatorTree *MDT;
-
-public:
- AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
-
- AArch64PostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB,
- MachineDominatorTree *MDT)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT) {
- if (!RuleConfig.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
- const auto *LI = STI.getLegalizerInfo();
- CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, LI);
- AArch64PostLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
- Impl.setupMF(*MI.getMF(), KB);
- return Impl.tryCombineAll(MI);
-}
-
class AArch64PostLegalizerCombiner : public MachineFunctionPass {
public:
static char ID;
@@ -429,6 +440,23 @@ public:
private:
bool IsOptNone;
+ AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
+
+
+ struct StoreInfo {
+ GStore *St = nullptr;
+ // The G_PTR_ADD that's used by the store. We keep this to cache the
+ // MachineInstr def.
+ GPtrAdd *Ptr = nullptr;
+ // The signed offset to the Ptr instruction.
+ int64_t Offset = 0;
+ LLT StoredType;
+ };
+ bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
+ CSEMIRBuilder &MIB);
+
+ bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
+ CSEMIRBuilder &MIB);
};
} // end anonymous namespace
@@ -450,6 +478,9 @@ void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
}
bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
@@ -462,17 +493,208 @@ bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
- MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+
+ const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB, MDT);
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, CSEInfo);
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
+ RuleConfig, ST, MDT, LI);
+ bool Changed = Impl.combineMachineInstrs();
+
+ auto MIB = CSEMIRBuilder(MF);
+ MIB.setCSEInfo(CSEInfo);
+ Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
+ return Changed;
+}
+
+bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
+ SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {
+ if (Stores.size() <= 2)
+ return false;
+
+ // Profitabity checks:
+ int64_t BaseOffset = Stores[0].Offset;
+ unsigned NumPairsExpected = Stores.size() / 2;
+ unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
+ // Size savings will depend on whether we can fold the offset, as an
+ // immediate of an ADD.
+ auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
+ if (!TLI.isLegalAddImmediate(BaseOffset))
+ TotalInstsExpected++;
+ int SavingsExpected = Stores.size() - TotalInstsExpected;
+ if (SavingsExpected <= 0)
+ return false;
+
+ auto &MRI = MIB.getMF().getRegInfo();
+
+ // We have a series of consecutive stores. Factor out the common base
+ // pointer and rewrite the offsets.
+ Register NewBase = Stores[0].Ptr->getReg(0);
+ for (auto &SInfo : Stores) {
+ // Compute a new pointer with the new base ptr and adjusted offset.
+ MIB.setInstrAndDebugLoc(*SInfo.St);
+ auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
+ auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
+ NewBase, NewOff);
+ if (MIB.getObserver())
+ MIB.getObserver()->changingInstr(*SInfo.St);
+ SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
+ if (MIB.getObserver())
+ MIB.getObserver()->changedInstr(*SInfo.St);
+ }
+ LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
+ << " stores into a base pointer and offsets.\n");
+ return true;
+}
+
+static cl::opt<bool>
+ EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable consecutive memop optimization "
+ "in AArch64PostLegalizerCombiner"));
+
+bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
+ MachineFunction &MF, CSEMIRBuilder &MIB) {
+ // This combine needs to run after all reassociations/folds on pointer
+ // addressing have been done, specifically those that combine two G_PTR_ADDs
+ // with constant offsets into a single G_PTR_ADD with a combined offset.
+ // The goal of this optimization is to undo that combine in the case where
+ // doing so has prevented the formation of pair stores due to illegal
+ // addressing modes of STP. The reason that we do it here is because
+ // it's much easier to undo the transformation of a series consecutive
+ // mem ops, than it is to detect when doing it would be a bad idea looking
+ // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
+ //
+ // An example:
+ // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
+ // %off1:_(s64) = G_CONSTANT i64 4128
+ // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
+ // %off2:_(s64) = G_CONSTANT i64 4144
+ // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
+ // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
+ // %off3:_(s64) = G_CONSTANT i64 4160
+ // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
+ // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
+ bool Changed = false;
+ auto &MRI = MF.getRegInfo();
+
+ if (!EnableConsecutiveMemOpOpt)
+ return Changed;
+
+ SmallVector<StoreInfo, 8> Stores;
+ // If we see a load, then we keep track of any values defined by it.
+ // In the following example, STP formation will fail anyway because
+ // the latter store is using a load result that appears after the
+ // the prior store. In this situation if we factor out the offset then
+ // we increase code size for no benefit.
+ // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
+ // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
+ // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
+ SmallVector<Register> LoadValsSinceLastStore;
+
+ auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
+ // Check if this store is consecutive to the last one.
+ if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
+ (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
+ New.Offset) ||
+ Last.StoredType != New.StoredType)
+ return false;
+
+ // Check if this store is using a load result that appears after the
+ // last store. If so, bail out.
+ if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
+ return New.St->getValueReg() == LoadVal;
+ }))
+ return false;
+
+ // Check if the current offset would be too large for STP.
+ // If not, then STP formation should be able to handle it, so we don't
+ // need to do anything.
+ int64_t MaxLegalOffset;
+ switch (New.StoredType.getSizeInBits()) {
+ case 32:
+ MaxLegalOffset = 252;
+ break;
+ case 64:
+ MaxLegalOffset = 504;
+ break;
+ case 128:
+ MaxLegalOffset = 1008;
+ break;
+ default:
+ llvm_unreachable("Unexpected stored type size");
+ }
+ if (New.Offset < MaxLegalOffset)
+ return false;
+
+ // If factoring it out still wouldn't help then don't bother.
+ return New.Offset - Stores[0].Offset <= MaxLegalOffset;
+ };
+
+ auto resetState = [&]() {
+ Stores.clear();
+ LoadValsSinceLastStore.clear();
+ };
+
+ for (auto &MBB : MF) {
+ // We're looking inside a single BB at a time since the memset pattern
+ // should only be in a single block.
+ resetState();
+ for (auto &MI : MBB) {
+ if (auto *St = dyn_cast<GStore>(&MI)) {
+ Register PtrBaseReg;
+ APInt Offset;
+ LLT StoredValTy = MRI.getType(St->getValueReg());
+ unsigned ValSize = StoredValTy.getSizeInBits();
+ if (ValSize < 32 || ValSize != St->getMMO().getSizeInBits())
+ continue;
+
+ Register PtrReg = St->getPointerReg();
+ if (mi_match(
+ PtrReg, MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
+ GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
+ StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
+
+ if (Stores.empty()) {
+ Stores.push_back(New);
+ continue;
+ }
+
+ // Check if this store is a valid continuation of the sequence.
+ auto &Last = Stores.back();
+ if (storeIsValid(Last, New)) {
+ Stores.push_back(New);
+ LoadValsSinceLastStore.clear(); // Reset the load value tracking.
+ } else {
+ // The store isn't a valid to consider for the prior sequence,
+ // so try to optimize what we have so far and start a new sequence.
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ Stores.push_back(New);
+ }
+ }
+ } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
+ LoadValsSinceLastStore.push_back(Ld->getDstReg());
+ }
+ }
+ Changed |= tryOptimizeConsecStores(Stores, MIB);
+ resetState();
+ }
+
+ return Changed;
}
char AArch64PostLegalizerCombiner::ID = 0;