aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-07-26 19:03:47 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-07-26 19:04:23 +0000
commit7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree27825c83636c4de341eb09a74f49f5d38a15d165 /llvm/lib/CodeGen
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
downloadsrc-7fa27ce4a07f19b07799a767fc29416f3b625afb.tar.gz
src-7fa27ce4a07f19b07799a767fc29416f3b625afb.zip
Vendor import of llvm-project main llvmorg-17-init-19304-gd0b54bb50e51,vendor/llvm-project/llvmorg-17-init-19304-gd0b54bb50e51
the last commit before the upstream release/17.x branch was created.
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp18
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.h3
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp63
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp18
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp250
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp57
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp55
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp251
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h57
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp289
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp18
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.h7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp16
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp22
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp738
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp5
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp18
-rw-r--r--llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp92
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp87
-rw-r--r--llvm/lib/CodeGen/BranchFolding.h16
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp19
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp30
-rw-r--r--llvm/lib/CodeGen/CFIInstrInserter.cpp3
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp19
-rw-r--r--llvm/lib/CodeGen/CallBrPrepare.cpp231
-rw-r--r--llvm/lib/CodeGen/CallingConvLower.cpp29
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp9
-rw-r--r--llvm/lib/CodeGen/CodeGenCommonISel.cpp18
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp438
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp65
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp1670
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp37
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp36
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp40
-rw-r--r--llvm/lib/CodeGen/DetectDeadLanes.cpp215
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp86
-rw-r--r--llvm/lib/CodeGen/ExecutionDomainFix.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp18
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp73
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp34
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp49
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp4
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp535
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp68
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp154
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp60
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp28
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp571
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp322
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp45
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp12
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp171
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp50
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp10
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp117
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.cpp15
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.h5
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp152
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp6
-rw-r--r--llvm/lib/CodeGen/KCFI.cpp111
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp46
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h8
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp14
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp6
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp27
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp17
-rw-r--r--llvm/lib/CodeGen/LiveRangeShrink.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp76
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp101
-rw-r--r--llvm/lib/CodeGen/LowLevelTypeUtils.cpp85
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp80
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp21
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp74
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp34
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp95
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.cpp2
-rw-r--r--llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp47
-rw-r--r--llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp42
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp97
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp31
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp24
-rw-r--r--llvm/lib/CodeGen/MachineCheckDebugify.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp98
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp494
-rw-r--r--llvm/lib/CodeGen/MachineDebugify.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFrameInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp54
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp126
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp100
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp37
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp55
-rw-r--r--llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp104
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp26
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp298
-rw-r--r--llvm/lib/CodeGen/MachinePassManager.cpp2
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp54
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp22
-rw-r--r--llvm/lib/CodeGen/MachineSSAContext.cpp10
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp412
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp159
-rw-r--r--llvm/lib/CodeGen/MachineSizeOpts.cpp166
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp88
-rw-r--r--llvm/lib/CodeGen/MachineUniformityAnalysis.cpp95
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp239
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp9
-rw-r--r--llvm/lib/CodeGen/OptimizePHIs.cpp4
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp6
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp10
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp6
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp170
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp10
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp137
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp5
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp930
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp328
-rw-r--r--llvm/lib/CodeGen/RDFRegisters.cpp321
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp25
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp6
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp8
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h2
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp354
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp87
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.h26
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp2
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoCollector.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterBank.cpp18
-rw-r--r--llvm/lib/CodeGen/RegisterBankInfo.cpp52
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp100
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp10
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp159
-rw-r--r--llvm/lib/CodeGen/RenameIndependentSubregs.cpp8
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp2
-rw-r--r--llvm/lib/CodeGen/ResetMachineFunctionPass.cpp5
-rw-r--r--llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp17
-rw-r--r--llvm/lib/CodeGen/ScheduleDAG.cpp16
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2633
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp98
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp49
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp461
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp138
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp389
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h40
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp93
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp377
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp973
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp717
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h62
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp38
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp125
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp619
-rw-r--r--llvm/lib/CodeGen/ShrinkWrap.cpp536
-rw-r--r--llvm/lib/CodeGen/SjLjEHPrepare.cpp26
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.h10
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp19
-rw-r--r--llvm/lib/CodeGen/SplitKit.h11
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp13
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp5
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp13
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp117
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp73
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp25
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp186
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp69
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp127
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp57
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp20
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp61
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp9
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp41
-rw-r--r--llvm/lib/CodeGen/VLIWMachineScheduler.cpp2
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp45
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp31
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp7
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp150
-rw-r--r--llvm/lib/CodeGen/XRayInstrumentation.cpp3
223 files changed, 15404 insertions, 7469 deletions
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bb71d72256d8..886c4db069f1 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -31,7 +32,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <utility>
@@ -200,7 +200,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
LLVM_DEBUG(dbgs() << "\tRegs:");
std::vector<unsigned> &DefIndices = State->GetDefIndices();
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
// If Reg is current live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
// has been scheduled). If it is not live but was defined in the
@@ -246,9 +246,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
IsImplicitDefUse(MI, MO)) {
const Register Reg = MO.getReg();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PassthruRegs.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PassthruRegs.insert(SubReg);
}
}
}
@@ -322,8 +321,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
// was not live because otherwise, regardless whether we have an explicit
// use of the subregister, the subregister's contents are needed for the
// uses of the superregister.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubregReg = *SubRegs;
+ for (MCPhysReg SubregReg : TRI->subregs(Reg)) {
if (!State->IsLive(SubregReg)) {
KillIndices[SubregReg] = KillIdx;
DefIndices[SubregReg] = ~0u;
@@ -353,8 +351,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef()) continue;
+ for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -778,7 +775,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
LLVM_DEBUG(dbgs() << "Available regs:");
- for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
@@ -922,7 +919,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
assert(AntiDepReg != 0);
- if (AntiDepReg == 0) continue;
// Determine AntiDepReg's register group.
const unsigned GroupIndex = State->GetGroup(AntiDepReg);
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 419cb7626945..cece217e645c 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -132,6 +132,9 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
AggressiveAntiDepBreaker(MachineFunction &MFi,
const RegisterClassInfo &RCI,
TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ AggressiveAntiDepBreaker &
+ operator=(const AggressiveAntiDepBreaker &other) = delete;
+ AggressiveAntiDepBreaker(const AggressiveAntiDepBreaker &other) = delete;
~AggressiveAntiDepBreaker() override;
/// Initialize anti-dep breaking for a new basic block.
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index b9579441a0ba..2065bfbd1c44 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -79,8 +79,8 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<EVT> *MemVTs,
- SmallVectorImpl<uint64_t> *Offsets,
- uint64_t StartingOffset) {
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
// If the Offsets aren't needed, don't query the struct layout. This allows
@@ -92,7 +92,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
EE = STy->element_end();
EI != EE; ++EI) {
// Don't compute the element offset if we didn't get a StructLayout above.
- uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0;
+ TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB)
+ : TypeSize::get(0, StartingOffset.isScalable());
ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
StartingOffset + EltOffset);
}
@@ -101,7 +102,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
+ TypeSize EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
@@ -120,12 +121,62 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *Offsets,
- uint64_t StartingOffset) {
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
StartingOffset);
}
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
SmallVectorImpl<LLT> &ValueTys,
SmallVectorImpl<uint64_t> *Offsets,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 22ecc5199742..aab3c2681339 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -194,8 +194,8 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
uint32_t CompUnitCount;
uint32_t LocalTypeUnitCount = 0;
uint32_t ForeignTypeUnitCount = 0;
- uint32_t BucketCount;
- uint32_t NameCount;
+ uint32_t BucketCount = 0;
+ uint32_t NameCount = 0;
uint32_t AbbrevTableSize = 0;
uint32_t AugmentationStringSize = sizeof(AugmentationString);
char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
@@ -549,9 +549,13 @@ void llvm::emitDWARF5AccelTable(
SmallVector<unsigned, 1> CUIndex(CUs.size());
int Count = 0;
for (const auto &CU : enumerate(CUs)) {
- if (CU.value()->getCUNode()->getNameTableKind() !=
- DICompileUnit::DebugNameTableKind::Default)
+ switch (CU.value()->getCUNode()->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::Default:
+ case DICompileUnit::DebugNameTableKind::Apple:
+ break;
+ default:
continue;
+ }
CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
const DwarfCompileUnit *MainCU =
@@ -660,9 +664,9 @@ void AccelTableBase::HashData::print(raw_ostream &OS) const {
void AccelTableBase::print(raw_ostream &OS) const {
// Print Content.
OS << "Entries: \n";
- for (const auto &Entry : Entries) {
- OS << "Name: " << Entry.first() << "\n";
- for (auto *V : Entry.second.Values)
+ for (const auto &[Name, Data] : Entries) {
+ OS << "Name: " << Name << "\n";
+ for (auto *V : Data.Values)
V->print(OS);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 32d8dc793510..00ee4e1b47a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <utility>
@@ -24,7 +25,7 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
}
MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
- static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
+ static const uint8_t AddrSize = Asm.MAI->getCodePointerSize();
MCSymbol *EndLabel =
Asm.emitDwarfUnitLength("debug_addr", "Length of contribution");
@@ -65,7 +66,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
: MCSymbolRefExpr::create(I.first, Asm.OutContext);
for (const MCExpr *Entry : Entries)
- Asm.OutStreamer->emitValue(Entry, Asm.getDataLayout().getPointerSize());
+ Asm.OutStreamer->emitValue(Entry, Asm.MAI->getCodePointerSize());
if (EndLabel)
Asm.OutStreamer->emitLabel(EndLabel);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8c126d20fc9a..5381dfdd184c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -26,12 +26,11 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -39,6 +38,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -67,6 +67,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
@@ -99,6 +100,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/Object/ELFTypes.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Support/Casting.h"
@@ -113,6 +115,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
@@ -128,6 +131,13 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+static cl::opt<std::string> BasicBlockProfileDump(
+ "mbb-profile-dump", cl::Hidden,
+ cl::desc("Basic block profile dump for external cost modelling. If "
+ "matching up BBs with afterwards, the compilation must be "
+ "performed with -basic-block-sections=labels. Enabling this "
+ "flag during in-process ThinLTO is not supported."));
+
const char DWARFGroupName[] = "dwarf";
const char DWARFGroupDescription[] = "DWARF Emission";
const char DbgTimerName[] = "emit";
@@ -414,6 +424,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -475,6 +486,11 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
+ // On AIX, emit bytes for llvm.commandline metadata after .file so that the
+ // C_INFO symbol is preserved if any csect is kept by the linker.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
+
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (const auto &I : *MI)
@@ -531,7 +547,7 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
assert(MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI ||
- ModuleCFISection != CFISection::EH);
+ usesCFIWithoutEH() || ModuleCFISection != CFISection::EH);
break;
default:
break;
@@ -540,7 +556,7 @@ bool AsmPrinter::doInitialization(Module &M) {
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
- if (!needsCFIForDebug())
+ if (!usesCFIWithoutEH())
break;
[[fallthrough]];
case ExceptionHandling::SjLj:
@@ -585,6 +601,16 @@ bool AsmPrinter::doInitialization(Module &M) {
HI.Handler->beginModule(&M);
}
+ if (!BasicBlockProfileDump.empty()) {
+ std::error_code PossibleFileError;
+ MBBProfileDumpFileOutput = std::make_unique<raw_fd_ostream>(
+ BasicBlockProfileDump, PossibleFileError);
+ if (PossibleFileError) {
+ M.getContext().emitError("Failed to open file for MBB Profile Dump: " +
+ PossibleFileError.message() + "\n");
+ }
+ }
+
return false;
}
@@ -704,8 +730,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (T.getArch() != Triple::aarch64 || !T.isAndroid())
OutContext.reportError(SMLoc(),
- "Tagged symbols (-fsanitize=memtag-globals) are "
- "only supported on aarch64 + Android.");
+ "tagged symbols (-fsanitize=memtag-globals) are "
+ "only supported on AArch64 Android");
OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr());
}
@@ -908,13 +934,6 @@ void AsmPrinter::emitFunctionHeader() {
if (F.hasFnAttribute(Attribute::Cold))
OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
- if (isVerbose()) {
- F.printAsOperand(OutStreamer->getCommentOS(),
- /*PrintType=*/false, F.getParent());
- emitFunctionHeaderComment();
- OutStreamer->getCommentOS() << '\n';
- }
-
// Emit the prefix data.
if (F.hasPrefixData()) {
if (MAI->hasSubsectionsViaSymbols()) {
@@ -958,6 +977,23 @@ void AsmPrinter::emitFunctionHeader() {
CurrentPatchableFunctionEntrySym = CurrentFnBegin;
}
+ // Emit the function prologue data for the indirect call sanitizer.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+ assert(MD->getNumOperands() == 2);
+
+ auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+ auto *TypeHash = mdconst::extract<Constant>(MD->getOperand(1));
+ emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+ emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash);
+ }
+
+ if (isVerbose()) {
+ F.printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, F.getParent());
+ emitFunctionHeaderComment();
+ OutStreamer->getCommentOS() << '\n';
+ }
+
// Emit the function descriptor. This is a virtual function to allow targets
// to emit their specific function descriptor. Right now it is only used by
// the AIX target. The PowerPC 64-bit V1 ELF target also uses function
@@ -1005,24 +1041,6 @@ void AsmPrinter::emitFunctionHeader() {
// Emit the prologue data.
if (F.hasPrologueData())
emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
- // Emit the function prologue data for the indirect call sanitizer.
- if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
- assert(TM.getTargetTriple().getArch() == Triple::x86 ||
- TM.getTargetTriple().getArch() == Triple::x86_64);
- assert(MD->getNumOperands() == 2);
-
- auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
- auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
- assert(PrologueSig && FTRTTIProxy);
- emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
- const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
- const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
- const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
- // Use 32 bit since only small code model is supported.
- OutStreamer->emitValue(PCRel, 4u);
- }
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -1254,6 +1272,10 @@ AsmPrinter::getFunctionCFISectionType(const Function &F) const {
F.needsUnwindTableEntry())
return CFISection::EH;
+ if (MAI->usesCFIWithoutEH() && F.hasUWTable())
+ return CFISection::EH;
+
+ assert(MMI != nullptr && "Invalid machine module info");
if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
return CFISection::Debug;
@@ -1269,14 +1291,13 @@ bool AsmPrinter::needsSEHMoves() {
return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
}
-bool AsmPrinter::needsCFIForDebug() const {
- return MAI->getExceptionHandlingType() == ExceptionHandling::None &&
- MAI->doesUseCFIForDebug() && ModuleCFISection == CFISection::Debug;
+bool AsmPrinter::usesCFIWithoutEH() const {
+ return MAI->usesCFIWithoutEH() && ModuleCFISection != CFISection::None;
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
- if (!needsCFIForDebug() &&
+ if (!usesCFIWithoutEH() &&
ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
ExceptionHandlingType != ExceptionHandling::ARM)
return;
@@ -1310,21 +1331,16 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
-/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a
-/// given basic block. This can be used to capture more precise profile
-/// information. We use the last 4 bits (LSBs) to encode the following
-/// information:
-/// * (1): set if return block (ret or tail call).
-/// * (2): set if ends with a tail call.
-/// * (3): set if exception handling (EH) landing pad.
-/// * (4): set if the block can fall through to its next.
-/// The remaining bits are zero.
-static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
+/// Returns the BB metadata to be emitted in the SHT_LLVM_BB_ADDR_MAP section
+/// for a given basic block. This can be used to capture more precise profile
+/// information.
+static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
- return ((unsigned)MBB.isReturnBlock()) |
- ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
- (MBB.isEHPad() << 2) |
- (const_cast<MachineBasicBlock &>(MBB).canFallThrough() << 3);
+ return object::BBAddrMap::BBEntry::Metadata{
+ MBB.isReturnBlock(), !MBB.empty() && TII->isTailCall(MBB.back()),
+ MBB.isEHPad(), const_cast<MachineBasicBlock &>(MBB).canFallThrough(),
+ !MBB.empty() && MBB.rbegin()->isIndirectBranch()}
+ .encode();
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1346,7 +1362,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
OutStreamer->AddComment("number of basic blocks");
OutStreamer->emitULEB128IntValue(MF.size());
const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
- // Emit BB Information for each basic block in the funciton.
+ // Emit BB Information for each basic block in the function.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
@@ -1496,9 +1512,22 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
// constants may appear, which will simply be emitted into the current
// section (the user of MD_pcsections decides the format of encoded data).
assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+ bool ConstULEB128 = false;
for (const MDOperand &MDO : MD.operands()) {
if (auto *S = dyn_cast<MDString>(MDO)) {
- SwitchSection(S->getString());
+ // Found string, start of new section!
+ // Find options for this section "<section>!<opts>" - supported options:
+ // C = Compress constant integers of size 2-8 bytes as ULEB128.
+ const StringRef SecWithOpt = S->getString();
+ const size_t OptStart = SecWithOpt.find('!'); // likely npos
+ const StringRef Sec = SecWithOpt.substr(0, OptStart);
+ const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+ ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+ for (char O : Opts)
+ assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+ SwitchSection(Sec);
const MCSymbol *Prev = Syms.front();
for (const MCSymbol *Sym : Syms) {
if (Sym == Prev || !Deltas) {
@@ -1510,17 +1539,30 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
// `base + addr`.
emitLabelDifference(Sym, Base, RelativeRelocSize);
} else {
- emitLabelDifference(Sym, Prev, 4);
+ // Emit delta between symbol and previous symbol.
+ if (ConstULEB128)
+ emitLabelDifferenceAsULEB128(Sym, Prev);
+ else
+ emitLabelDifference(Sym, Prev, 4);
}
Prev = Sym;
}
} else {
+ // Emit auxiliary data after PC.
assert(isa<MDNode>(MDO) && "expecting either string or tuple");
const auto *AuxMDs = cast<MDNode>(MDO);
for (const MDOperand &AuxMDO : AuxMDs->operands()) {
assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
- const auto *C = cast<ConstantAsMetadata>(AuxMDO);
- emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+ const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+ if (auto *CI = dyn_cast<ConstantInt>(C);
+ CI && ConstULEB128 && Size > 1 && Size <= 8) {
+ emitULEB128(CI->getZExtValue());
+ } else {
+ emitGlobalConstant(DL, C);
+ }
}
}
}
@@ -1582,6 +1624,7 @@ void AsmPrinter::emitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
+ bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
@@ -1620,10 +1663,25 @@ void AsmPrinter::emitFunctionBody() {
emitFrameAlloc(MI);
break;
case TargetOpcode::ANNOTATION_LABEL:
- case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
break;
+ case TargetOpcode::EH_LABEL:
+ OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
+ // For AsynchEH, insert a Nop if followed by a trap inst
+ // Or the exception won't be caught.
+ // (see MCConstantExpr::create(1,..) in WinException.cpp)
+ // Ignore SDiv/UDiv because a DIV with Const-0 divisor
+ // must have being turned into an UndefValue.
+ // Div with variable opnds won't be the first instruction in
+ // an EH region as it must be led by at least a Load
+ {
+ auto MI2 = std::next(MI.getIterator());
+ if (IsEHa && MI2 != MBB.end() &&
+ (MI2->mayLoadOrStore() || MI2->mayRaiseFPException()))
+ emitNops(1);
+ }
+ break;
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
emitInlineAsm(&MI);
@@ -1862,6 +1920,23 @@ void AsmPrinter::emitFunctionBody() {
OutStreamer->getCommentOS() << "-- End function\n";
OutStreamer->addBlankLine();
+
+ // Output MBB ids, function names, and frequencies if the flag to dump
+ // MBB profile information has been set
+ if (MBBProfileDumpFileOutput) {
+ if (!MF->hasBBLabels())
+ MF->getContext().reportError(
+ SMLoc(),
+ "Unable to find BB labels for MBB profile dump. -mbb-profile-dump "
+ "must be called with -basic-block-sections=labels");
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ for (const auto &MBB : *MF) {
+ *MBBProfileDumpFileOutput.get()
+ << MF->getName() << "," << MBB.getBBID() << ","
+ << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n";
+ }
+ }
}
/// Compute the number of Global Variables that uses a Constant.
@@ -2235,6 +2310,8 @@ bool AsmPrinter::doFinalization(Module &M) {
SmallVector<const GlobalAlias *, 16> AliasStack;
SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
for (const auto &Alias : M.aliases()) {
+ if (Alias.hasAvailableExternallyLinkage())
+ continue;
for (const GlobalAlias *Cur = &Alias; Cur;
Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
if (!AliasVisited.insert(Cur).second)
@@ -2258,7 +2335,9 @@ bool AsmPrinter::doFinalization(Module &M) {
emitModuleIdents(M);
// Emit bytes for llvm.commandline metadata.
- emitModuleCommandLines(M);
+ // The command line metadata is emitted earlier on XCOFF.
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
// Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
// split-stack is used.
@@ -2786,6 +2865,22 @@ void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
/// Emit a long directive and value.
void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitSLEB128IntValue(Value);
+}
+
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitULEB128IntValue(Value, PadTo);
+}
+
/// Emit a long long directive and value.
void AsmPrinter::emitInt64(uint64_t Value) const {
OutStreamer->emitInt64(Value);
@@ -2799,6 +2894,12 @@ void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
}
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+ const MCSymbol *Lo) const {
+ OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
@@ -3288,7 +3389,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
ExtraBitsSize = alignTo(ExtraBitsSize, 8);
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
- Realigned.lshrInPlace(ExtraBitsSize);
+ if (BitWidth >= 64)
+ Realigned.lshrInPlace(ExtraBitsSize);
} else
ExtraBits = Realigned.getRawData()[BitWidth / 64];
}
@@ -3917,16 +4019,18 @@ void AsmPrinter::emitXRayTable() {
Flags, 0, GroupName, F.hasComdat(),
MCSection::NonUniqueID, LinkedToSym);
- if (!TM.Options.XRayOmitFunctionIndex)
+ if (TM.Options.XRayFunctionIndex)
FnSledIndex = OutContext.getELFSection(
- "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0,
- GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym);
+ "xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(),
+ MCSection::NonUniqueID, LinkedToSym);
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
- InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+ InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map",
+ MachO::S_ATTR_LIVE_SUPPORT,
SectionKind::getReadOnlyWithRel());
- if (!TM.Options.XRayOmitFunctionIndex)
- FnSledIndex = OutContext.getMachOSection(
- "__DATA", "xray_fn_idx", 0, SectionKind::getReadOnlyWithRel());
+ if (TM.Options.XRayFunctionIndex)
+ FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx",
+ MachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
} else {
llvm_unreachable("Unsupported target");
}
@@ -3937,7 +4041,8 @@ void AsmPrinter::emitXRayTable() {
// per-function, we are able to create an index entry that will represent the
// range of sleds associated with a function.
auto &Ctx = OutContext;
- MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
+ MCSymbol *SledsStart =
+ OutContext.createLinkerPrivateSymbol("xray_sleds_start");
OutStreamer->switchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
@@ -3968,8 +4073,17 @@ void AsmPrinter::emitXRayTable() {
OutStreamer->switchSection(FnSledIndex);
OutStreamer->emitCodeAlignment(Align(2 * WordSizeBytes),
&getSubtargetInfo());
- OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
- OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
+ // For Mach-O, use an "l" symbol as the atom of this subsection. The label
+ // difference uses a SUBTRACTOR external relocation which references the
+ // symbol.
+ MCSymbol *Dot = Ctx.createLinkerPrivateSymbol("xray_fn_idx");
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(SledsStart, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(MCConstantExpr::create(Sleds.size(), Ctx),
+ WordSizeBytes);
OutStreamer->switchSection(PrevSection);
}
Sleds.clear();
@@ -4041,7 +4155,7 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
}
dwarf::FormParams AsmPrinter::getDwarfFormParams() const {
- return {getDwarfVersion(), uint8_t(getPointerSize()),
+ return {getDwarfVersion(), uint8_t(MAI->getCodePointerSize()),
OutStreamer->getContext().getDwarfFormat(),
doesDwarfUseRelocationsAcrossSections()};
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ecaa64afab4d..21d0d070c247 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -32,28 +32,6 @@ using namespace llvm;
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
-/// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
- if (isVerbose() && Desc)
- OutStreamer->AddComment(Desc);
-
- OutStreamer->emitSLEB128IntValue(Value);
-}
-
-void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
- unsigned PadTo) const {
- if (isVerbose() && Desc)
- OutStreamer->AddComment(Desc);
-
- OutStreamer->emitULEB128IntValue(Value, PadTo);
-}
-
-/// Emit something like ".uleb128 Hi-Lo".
-void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
- const MCSymbol *Lo) const {
- OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
-}
-
static const char *DecodeDWARFEncoding(unsigned Encoding) {
switch (Encoding) {
case dwarf::DW_EH_PE_absptr:
@@ -130,7 +108,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return MF->getDataLayout().getPointerSize();
+ return MAI->getCodePointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -226,58 +204,59 @@ void AsmPrinter::emitCallSiteValue(uint64_t Value, unsigned Encoding) const {
//===----------------------------------------------------------------------===//
void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
+ SMLoc Loc = Inst.getLoc();
switch (Inst.getOperation()) {
default:
llvm_unreachable("Unexpected instruction");
case MCCFIInstruction::OpDefCfaOffset:
- OutStreamer->emitCFIDefCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIDefCfaOffset(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpAdjustCfaOffset:
- OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpDefCfa:
- OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpDefCfaRegister:
- OutStreamer->emitCFIDefCfaRegister(Inst.getRegister());
+ OutStreamer->emitCFIDefCfaRegister(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpLLVMDefAspaceCfa:
OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(),
- Inst.getAddressSpace());
+ Inst.getAddressSpace(), Loc);
break;
case MCCFIInstruction::OpOffset:
- OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpRegister:
- OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+ OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2(), Loc);
break;
case MCCFIInstruction::OpWindowSave:
- OutStreamer->emitCFIWindowSave();
+ OutStreamer->emitCFIWindowSave(Loc);
break;
case MCCFIInstruction::OpNegateRAState:
- OutStreamer->emitCFINegateRAState();
+ OutStreamer->emitCFINegateRAState(Loc);
break;
case MCCFIInstruction::OpSameValue:
- OutStreamer->emitCFISameValue(Inst.getRegister());
+ OutStreamer->emitCFISameValue(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpGnuArgsSize:
- OutStreamer->emitCFIGnuArgsSize(Inst.getOffset());
+ OutStreamer->emitCFIGnuArgsSize(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpEscape:
OutStreamer->AddComment(Inst.getComment());
- OutStreamer->emitCFIEscape(Inst.getValues());
+ OutStreamer->emitCFIEscape(Inst.getValues(), Loc);
break;
case MCCFIInstruction::OpRestore:
- OutStreamer->emitCFIRestore(Inst.getRegister());
+ OutStreamer->emitCFIRestore(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpUndefined:
- OutStreamer->emitCFIUndefined(Inst.getRegister());
+ OutStreamer->emitCFIUndefined(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpRememberState:
- OutStreamer->emitCFIRememberState();
+ OutStreamer->emitCFIRememberState(Loc);
break;
case MCCFIInstruction::OpRestoreState:
- OutStreamer->emitCFIRestoreState();
+ OutStreamer->emitCFIRestoreState(Loc);
break;
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c1588aaea05e..32674bbeb061 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 0a67c4b6beb6..8161de57b58e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -65,6 +64,7 @@
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -488,10 +488,10 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
// This variable was inlined. Associate it with the InlineSite.
const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
- Site.InlinedLocals.emplace_back(Var);
+ Site.InlinedLocals.emplace_back(std::move(Var));
} else {
// This variable goes into the corresponding lexical scope.
- ScopeVariables[LS].emplace_back(Var);
+ ScopeVariables[LS].emplace_back(std::move(Var));
}
}
@@ -569,7 +569,6 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C99:
case dwarf::DW_LANG_C11:
- case dwarf::DW_LANG_ObjC:
return SourceLanguage::C;
case dwarf::DW_LANG_C_plus_plus:
case dwarf::DW_LANG_C_plus_plus_03:
@@ -595,6 +594,10 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::Swift;
case dwarf::DW_LANG_Rust:
return SourceLanguage::Rust;
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::ObjC;
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return SourceLanguage::ObjCpp;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -788,7 +791,6 @@ void CodeViewDebug::emitObjName() {
// Don't emit the filename if we're writing to stdout or to /dev/null.
PathRef = {};
} else {
- llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true);
PathRef = PathStore;
}
@@ -1158,7 +1160,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function section index");
OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
- OS.emitInt8(0);
+ ProcSymFlags ProcFlags = ProcSymFlags::HasOptimizedDebugInfo;
+ if (FI.HasFramePointer)
+ ProcFlags |= ProcSymFlags::HasFP;
+ if (GV->hasFnAttribute(Attribute::NoReturn))
+ ProcFlags |= ProcSymFlags::IsNoReturn;
+ if (GV->hasFnAttribute(Attribute::NoInline))
+ ProcFlags |= ProcSymFlags::IsNoInline;
+ OS.emitInt8(static_cast<uint8_t>(ProcFlags));
// Emit the function display name as a null-terminated string.
OS.AddComment("Function name");
// Truncate the name so we won't overflow the record length field.
@@ -1262,7 +1271,8 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
const TargetFrameLowering *TFI = TSI.getFrameLowering();
const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
- for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) {
+ for (const MachineFunction::VariableDbgInfo &VI :
+ MF.getInStackSlotVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -1290,7 +1300,8 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Get the frame register used and the offset.
Register FrameReg;
- StackOffset FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ StackOffset FrameOffset =
+ TFI->getFrameIndexReference(*Asm->MF, VI.getStackSlot(), FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
assert(!FrameOffset.getScalable() &&
@@ -1476,6 +1487,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
} else {
+ CurFn->HasFramePointer = true;
// If there is an FP, parameters are always relative to it.
CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
if (CurFn->HasStackRealignment) {
@@ -1717,12 +1729,13 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
// where lowerbound is from the LowerBound field of the Subrange,
// or the language default lowerbound if that field is unspecified.
- if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>())
+ if (auto *CI = dyn_cast_if_present<ConstantInt *>(Subrange->getCount()))
Count = CI->getSExtValue();
- else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) {
+ else if (auto *UI = dyn_cast_if_present<ConstantInt *>(
+ Subrange->getUpperBound())) {
// Fortran uses 1 as the default lowerbound; other languages use 0.
int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
- auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+ auto *LI = dyn_cast_if_present<ConstantInt *>(Subrange->getLowerBound());
Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
Count = UI->getSExtValue() - Lowerbound + 1;
}
@@ -1793,12 +1806,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
}
break;
case dwarf::DW_ATE_complex_float:
+ // The CodeView size for a complex represents the size of
+ // an individual component.
switch (ByteSize) {
- case 2: STK = SimpleTypeKind::Complex16; break;
- case 4: STK = SimpleTypeKind::Complex32; break;
- case 8: STK = SimpleTypeKind::Complex64; break;
- case 10: STK = SimpleTypeKind::Complex80; break;
- case 16: STK = SimpleTypeKind::Complex128; break;
+ case 4: STK = SimpleTypeKind::Complex16; break;
+ case 8: STK = SimpleTypeKind::Complex32; break;
+ case 16: STK = SimpleTypeKind::Complex64; break;
+ case 20: STK = SimpleTypeKind::Complex80; break;
+ case 32: STK = SimpleTypeKind::Complex128; break;
}
break;
case dwarf::DW_ATE_float:
@@ -3279,7 +3294,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
// Second, emit each global that is in a comdat into its own .debug$S
// section along with its own symbol substream.
for (const CVGlobalVariable &CVGV : ComdatVariables) {
- const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>();
+ const GlobalVariable *GV = cast<const GlobalVariable *>(CVGV.GVInfo);
MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
@@ -3388,7 +3403,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
: getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
- CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
+ dyn_cast_if_present<const GlobalVariable *>(CVGV.GVInfo)) {
// DataSym record, see SymbolRecord.h for more info. Thread local data
// happens to have the same format as global data.
MCSymbol *GVSym = Asm->getSymbol(GV);
@@ -3403,7 +3418,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
OS.AddComment("DataOffset");
uint64_t Offset = 0;
- if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
+ if (CVGlobalVariableOffsets.contains(DIGV))
// Use the offset seen while collecting info on globals.
Offset = CVGlobalVariableOffsets[DIGV];
OS.emitCOFFSecRel32(GVSym, Offset);
@@ -3415,7 +3430,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
endSymbolRecord(DataEnd);
} else {
- const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
+ const DIExpression *DIE = cast<const DIExpression *>(CVGV.GVInfo);
assert(DIE->isConstant() &&
"Global constant variables must contain a constant expression.");
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 495822a6e653..1455ac417824 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -191,6 +192,8 @@ private:
bool HasStackRealignment = false;
bool HaveLineInfo = false;
+
+ bool HasFramePointer = false;
};
FunctionInfo *CurFn = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 308d4b1b5d61..619155cafe92 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -173,9 +173,7 @@ void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
// DIE Implementation
//===----------------------------------------------------------------------===//
-DIE *DIE::getParent() const {
- return Owner.dyn_cast<DIE*>();
-}
+DIE *DIE::getParent() const { return dyn_cast_if_present<DIE *>(Owner); }
DIEAbbrev DIE::generateAbbrev() const {
DIEAbbrev Abbrev(Tag, hasChildren());
@@ -209,7 +207,7 @@ const DIE *DIE::getUnitDie() const {
DIEUnit *DIE::getUnit() const {
const DIE *UnitDie = getUnitDie();
if (UnitDie)
- return UnitDie->Owner.dyn_cast<DIEUnit*>();
+ return dyn_cast_if_present<DIEUnit *>(UnitDie->Owner);
return nullptr;
}
@@ -385,6 +383,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_addrx2:
case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_addrx3:
case dwarf::DW_FORM_strp:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_data4:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 0b40cdb0c3cc..55a0afcf7a33 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -138,6 +138,9 @@ void DbgValueHistoryMap::trimLocationRanges(
// references if any entries are removed.
SmallVector<size_t, 4> Offsets;
+ LLVM_DEBUG(dbgs() << "Trimming location ranges for function '" << MF.getName()
+ << "'\n");
+
for (auto &Record : VarEntries) {
auto &HistoryMapEntries = Record.second;
if (HistoryMapEntries.empty())
@@ -213,6 +216,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// count of the closing entry, if one exists.
if (EndIndex != NoEntry)
ReferenceCount[EndIndex] -= 1;
+ LLVM_DEBUG(dbgs() << "Dropping value outside scope range of variable: ";
+ StartMI->print(llvm::dbgs()););
}
}
@@ -253,6 +258,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// ToRemove indices are valid after each erase.
for (EntryIndex Idx : llvm::reverse(ToRemove))
HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
+ LLVM_DEBUG(llvm::dbgs() << "New HistoryMap('" << LocalVar->getName()
+ << "') size: " << HistoryMapEntries.size() << "\n");
}
}
@@ -555,8 +562,8 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
- dbgs() << "DbgValueHistoryMap:\n";
+LLVM_DUMP_METHOD void DbgValueHistoryMap::dump(StringRef FuncName) const {
+ dbgs() << "DbgValueHistoryMap('" << FuncName << "'):\n";
for (const auto &VarRangePair : *this) {
const InlinedEntity &Var = VarRangePair.first;
const Entries &Entries = VarRangePair.second;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 858a3e75e515..eb2d992c7e75 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -223,6 +223,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
Encoding == dwarf::DW_ATE_signed_char ||
Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
Encoding == dwarf::DW_ATE_boolean ||
+ Encoding == dwarf::DW_ATE_complex_float ||
(Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
Ty->getName() == "decltype(nullptr)")) &&
"Unsupported encoding");
@@ -273,7 +274,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
InstOrdering.initialize(*MF);
if (TrimVarLocs)
DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering);
- LLVM_DEBUG(DbgValues.dump());
+ LLVM_DEBUG(DbgValues.dump(MF->getName()));
// Request labels for the full history.
for (const auto &I : DbgValues) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 2008aa39ff87..726aba18bb80 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -126,7 +126,7 @@ public:
: Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
IsVariadic(IsVariadic) {
#ifndef NDEBUG
- assert(cast<DIExpression>(Expr)->isValid() ||
+ assert(Expr->isValid() ||
!any_of(Locs, [](auto LE) { return LE.isLocation(); }));
if (!IsVariadic) {
assert(ValueLocEntries.size() == 1);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0515173b4a24..a96bdd034918 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -49,7 +49,7 @@ private:
SmallVector<Entry, 32> Entries;
SmallString<256> DWARFBytes;
std::vector<std::string> Comments;
- MCSymbol *Sym;
+ MCSymbol *Sym = nullptr;
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index df4fe8d49806..10c844ddb14a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -94,7 +94,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitCFI =
MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
else
- shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves;
+ shouldEmitCFI = Asm->usesCFIWithoutEH() && shouldEmitMoves;
}
void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6dde50375a60..58ed21379d29 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -266,7 +267,7 @@ void DwarfCompileUnit::addLocationAttribute(
// 16-bit platforms like MSP430 and AVR take this path, so sink this
// assert to platforms that use it.
auto GetPointerSizedFormAndOp = [this]() {
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ unsigned PointerSize = Asm->MAI->getCodePointerSize();
assert((PointerSize == 4 || PointerSize == 8) &&
"Add support for other sizes if necessary");
struct FormAndOp {
@@ -278,7 +279,16 @@ void DwarfCompileUnit::addLocationAttribute(
: FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u};
};
if (Global->isThreadLocal()) {
- if (Asm->TM.useEmulatedTLS()) {
+ if (Asm->TM.getTargetTriple().isWasm()) {
+ // FIXME This is not guaranteed, but in practice, in static linking,
+ // if present, __tls_base's index is 1. This doesn't hold for dynamic
+ // linking, so TLS variables used in dynamic linking won't have
+ // correct debug info for now. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__tls_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ } else if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
@@ -301,6 +311,14 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
+ } else if (Asm->TM.getTargetTriple().isWasm() &&
+ Asm->TM.getRelocationModel() == Reloc::PIC_) {
+ // FIXME This is not guaranteed, but in practice, if present,
+ // __memory_base's index is 1. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__memory_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
} else if ((Asm->TM.getRelocationModel() == Reloc::RWPI ||
Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) &&
!Asm->getObjFileLowering()
@@ -449,6 +467,39 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie);
}
+// Add info for Wasm-global-based relocation.
+// 'GlobalIndex' is used for split dwarf, which currently relies on a few
+// assumptions that are not guaranteed in a formal way but work in practice.
+void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex) {
+ // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+ // don't want to depend on target specific headers in this code?
+ const unsigned TI_GLOBAL_RELOC = 3;
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName));
+ // FIXME: this repeats what WebAssemblyMCInstLower::
+ // GetExternalSymbolSymbol does, since if there's no code that
+ // refers to this symbol, we have to set it here.
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ Sym->setGlobalType(wasm::WasmGlobalType{
+ static_cast<uint8_t>(PointerSize == 4 ? wasm::WASM_TYPE_I32
+ : wasm::WASM_TYPE_I64),
+ true});
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
+ if (!isDwoUnit()) {
+ addLabel(*Loc, dwarf::DW_FORM_data4, Sym);
+ } else {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data
+ // symbols are (with entries in .debug_addr).
+ // For now we hardcode the indices in the callsites. Global indices are not
+ // fixed, but in practice a few are fixed; for example, __stack_pointer is
+ // always index 0.
+ addUInt(*Loc, dwarf::DW_FORM_data4, GlobalIndex);
+ }
+}
+
DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
DIE *SPDie) {
SmallVector<RangeSpan, 2> BB_List;
@@ -480,40 +531,24 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
case TargetFrameLowering::DwarfFrameBase::CFA: {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
+ if (FrameBase.Location.Offset != 0) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_consts);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.Offset);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ }
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
break;
}
case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
- // don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
// These need to be relocatable.
- assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
- auto SPSym = cast<MCSymbolWasm>(
- Asm->GetExternalSymbolSymbol("__stack_pointer"));
- // FIXME: this repeats what WebAssemblyMCInstLower::
- // GetExternalSymbolSymbol does, since if there's no code that
- // refers to this symbol, we have to set it here.
- SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- SPSym->setGlobalType(wasm::WasmGlobalType{
- uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
- Triple::wasm64
- ? wasm::WASM_TYPE_I64
- : wasm::WASM_TYPE_I32),
- true});
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
- addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
- if (!isDwoUnit()) {
- addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- } else {
- // FIXME: when writing dwo, we need to avoid relocations. Probably
- // the "right" solution is to treat globals the way func and data
- // symbols are (with entries in .debug_addr).
- // For now, since we only ever use index 0, this should work as-is.
- addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
- }
+ assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
+ // For now, since we only ever use index 0, this should work as-is.
+ addWasmRelocBaseGlobal(Loc, "__stack_pointer",
+ FrameBase.Location.WasmLoc.Index);
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
} else {
@@ -608,7 +643,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
assert(!Ranges.empty());
if (!DD->useRangesSection() ||
(Ranges.size() == 1 &&
- (!DD->alwaysUseRanges() ||
+ (!DD->alwaysUseRanges(*this) ||
DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
Ranges.front().Begin))) {
const RangeSpan &Front = Ranges.front();
@@ -659,7 +694,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
auto *InlinedSP = getDISubprogram(DS);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
+ DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP];
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
@@ -691,10 +726,20 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
if (DD->isLexicalScopeDIENull(Scope))
return nullptr;
+ const auto *DS = Scope->getScopeNode();
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
- if (Scope->isAbstractScope())
+ if (Scope->isAbstractScope()) {
+ assert(!getAbstractScopeDIEs().count(DS) &&
+ "Abstract DIE for this scope exists!");
+ getAbstractScopeDIEs()[DS] = ScopeDIE;
return ScopeDIE;
+ }
+ if (!Scope->getInlinedAt()) {
+ assert(!LexicalBlockDIEs.count(DS) &&
+ "Concrete out-of-line DIE for this scope exists!");
+ LexicalBlockDIEs[DS] = ScopeDIE;
+ }
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
@@ -929,29 +974,29 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
if (auto Count = Subrange->getCount())
- if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
Result.push_back(Dependency);
if (auto LB = Subrange->getLowerBound())
- if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
Result.push_back(Dependency);
if (auto UB = Subrange->getUpperBound())
- if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
Result.push_back(Dependency);
if (auto ST = Subrange->getStride())
- if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
Result.push_back(Dependency);
} else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
if (auto Count = GenericSubrange->getCount())
- if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
Result.push_back(Dependency);
if (auto LB = GenericSubrange->getLowerBound())
- if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
Result.push_back(Dependency);
if (auto UB = GenericSubrange->getUpperBound())
- if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
Result.push_back(Dependency);
if (auto ST = GenericSubrange->getStride())
- if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
Result.push_back(Dependency);
}
}
@@ -1062,35 +1107,35 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
for (DbgVariable *DV : Locals)
ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
- // Emit imported entities (skipped in gmlt-like data).
- if (!includeMinimalInlineScopes()) {
- for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
- ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
-
// Emit labels.
for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
+ // Track other local entities (skipped in gmlt-like data).
+ // This creates mapping between CU and a set of local declarations that
+ // should be emitted for subprograms in this CU.
+ if (!includeMinimalInlineScopes() && !Scope->getInlinedAt()) {
+ auto &LocalDecls = DD->getLocalDeclsForScope(Scope->getScopeNode());
+ DeferredLocalDecls.insert(LocalDecls.begin(), LocalDecls.end());
+ }
+
// Emit inner lexical scopes.
- auto needToEmitLexicalScope = [this](LexicalScope *LS) {
- if (isa<DISubprogram>(LS->getScopeNode()))
- return true;
- auto Vars = DU->getScopeVariables().lookup(LS);
+ auto skipLexicalScope = [this](LexicalScope *S) -> bool {
+ if (isa<DISubprogram>(S->getScopeNode()))
+ return false;
+ auto Vars = DU->getScopeVariables().lookup(S);
if (!Vars.Args.empty() || !Vars.Locals.empty())
- return true;
- if (!includeMinimalInlineScopes() &&
- !ImportedEntities[LS->getScopeNode()].empty())
- return true;
- return false;
+ return false;
+ return includeMinimalInlineScopes() ||
+ DD->getLocalDeclsForScope(S->getScopeNode()).empty();
};
for (LexicalScope *LS : Scope->getChildren()) {
// If the lexical block doesn't have non-scope children, skip
// its emission and put its children directly to the parent scope.
- if (needToEmitLexicalScope(LS))
- constructScopeDIE(LS, ScopeDIE);
- else
+ if (skipLexicalScope(LS))
createAndAddScopeChildren(LS, ScopeDIE);
+ else
+ constructScopeDIE(LS, ScopeDIE);
}
return ObjectPointer;
@@ -1098,11 +1143,9 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
LexicalScope *Scope) {
- DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
- if (AbsDef)
- return;
-
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+ if (getAbstractScopeDIEs().count(SP))
+ return;
DIE *ContextDIE;
DwarfCompileUnit *ContextCU = this;
@@ -1126,14 +1169,19 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
// Passing null as the associated node because the abstract definition
// shouldn't be found by lookup.
- AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
- ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
- ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
+ DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
+ *ContextDIE, nullptr);
+
+ // Store the DIE before creating children.
+ ContextCU->getAbstractScopeDIEs()[SP] = &AbsDef;
+
+ ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef);
+ ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline,
DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>()
: dwarf::DW_FORM_implicit_const,
dwarf::DW_INL_inlined);
- if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
- ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+ if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef))
+ ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
@@ -1277,21 +1325,37 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
EntityDie = getOrCreateNameSpace(NS);
else if (auto *M = dyn_cast<DIModule>(Entity))
EntityDie = getOrCreateModule(M);
- else if (auto *SP = dyn_cast<DISubprogram>(Entity))
- EntityDie = getOrCreateSubprogramDIE(SP);
- else if (auto *T = dyn_cast<DIType>(Entity))
+ else if (auto *SP = dyn_cast<DISubprogram>(Entity)) {
+ // If there is an abstract subprogram, refer to it. Note that this assumes
+ // that all the abstract subprograms have been already created (which is
+ // correct until imported entities get emitted in DwarfDebug::endModule()).
+ if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP))
+ EntityDie = AbsSPDie;
+ else
+ EntityDie = getOrCreateSubprogramDIE(SP);
+ } else if (auto *T = dyn_cast<DIType>(Entity))
EntityDie = getOrCreateTypeDIE(T);
else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
EntityDie = getOrCreateGlobalVariableDIE(GV, {});
+ else if (auto *IE = dyn_cast<DIImportedEntity>(Entity))
+ EntityDie = getOrCreateImportedEntityDIE(IE);
else
EntityDie = getDIE(Entity);
assert(EntityDie);
addSourceLine(*IMDie, Module->getLine(), Module->getFile());
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
- if (!Name.empty())
+ if (!Name.empty()) {
addString(*IMDie, dwarf::DW_AT_name, Name);
+ // FIXME: if consumers ever start caring about handling
+ // unnamed import declarations such as `using ::nullptr_t`
+ // or `using namespace std::ranges`, we could add the
+ // import declaration into the accelerator table with the
+ // name being the one of the entity being imported.
+ DD->addAccelNamespace(*CUNode, Name, *IMDie);
+ }
+
// This is for imported module with renamed entities (such as variables and
// subprograms).
DINodeArray Elements = Module->getElements();
@@ -1305,9 +1369,24 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
return IMDie;
}
+DIE *DwarfCompileUnit::getOrCreateImportedEntityDIE(
+ const DIImportedEntity *IE) {
+
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(IE))
+ return Die;
+
+ DIE *ContextDIE = getOrCreateContextDIE(IE->getScope());
+ assert(ContextDIE && "Empty scope for the imported entity!");
+
+ DIE *IMDie = constructImportedEntityDIE(IE);
+ ContextDIE->addChild(IMDie);
+ return IMDie;
+}
+
void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
DIE *D = getDIE(SP);
- if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
+ if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) {
if (D)
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
@@ -1356,8 +1435,8 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
assert(Scope && Scope->isAbstractScope());
auto &Entity = getAbstractEntities()[Node];
if (isa<const DILocalVariable>(Node)) {
- Entity = std::make_unique<DbgVariable>(
- cast<const DILocalVariable>(Node), nullptr /* IA */);;
+ Entity = std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ nullptr /* IA */);
DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
} else if (isa<const DILabel>(Node)) {
Entity = std::make_unique<DbgLabel>(
@@ -1389,6 +1468,8 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
// generated for things like Gold's gdb_index generation.
case DICompileUnit::DebugNameTableKind::GNU:
return true;
+ case DICompileUnit::DebugNameTableKind::Apple:
+ return false;
case DICompileUnit::DebugNameTableKind::Default:
return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
!CUNode->isDebugDirectivesOnly() &&
@@ -1599,3 +1680,29 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
Btr.Die = &Die;
}
}
+
+DIE *DwarfCompileUnit::getLexicalBlockDIE(const DILexicalBlock *LB) {
+ // Assume if there is an abstract tree all the DIEs are already emitted.
+ bool isAbstract = getAbstractScopeDIEs().count(LB->getSubprogram());
+ if (isAbstract && getAbstractScopeDIEs().count(LB))
+ return getAbstractScopeDIEs()[LB];
+ assert(!isAbstract && "Missed lexical block DIE in abstract tree!");
+
+ // Return a concrete DIE if it exists or nullptr otherwise.
+ return LexicalBlockDIEs.lookup(LB);
+}
+
+DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) {
+ if (isa_and_nonnull<DILocalScope>(Context)) {
+ if (auto *LFScope = dyn_cast<DILexicalBlockFile>(Context))
+ Context = LFScope->getNonLexicalBlockFileScope();
+ if (auto *LScope = dyn_cast<DILexicalBlock>(Context))
+ return getLexicalBlockDIE(LScope);
+
+ // Otherwise the context must be a DISubprogram.
+ auto *SPScope = cast<DISubprogram>(Context);
+ if (getAbstractScopeDIEs().count(SPScope))
+ return getAbstractScopeDIEs()[SPScope];
+ }
+ return DwarfUnit::getOrCreateContextDIE(Context);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 7d87f35021bb..6ef73ebd4f7f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -61,11 +61,6 @@ class DwarfCompileUnit final : public DwarfUnit {
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
- using ImportedEntityList = SmallVector<const MDNode *, 8>;
- using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;
-
- ImportedEntityMap ImportedEntities;
-
/// GlobalNames - A map of globally visible named entities for this unit.
StringMap<const DIE *> GlobalNames;
@@ -79,7 +74,20 @@ class DwarfCompileUnit final : public DwarfUnit {
// ranges/locs.
const MCSymbol *BaseAddress = nullptr;
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ using MDNodeSetVector =
+ SetVector<const MDNode *, SmallVector<const MDNode *, 4>,
+ SmallPtrSet<const MDNode *, 4>>;
+
+ // List of entities (either static locals, types or imports) that
+ // belong to subprograms within this CU.
+ MDNodeSetVector DeferredLocalDecls;
+
+ // List of concrete lexical block scopes belong to subprograms within this CU.
+ DenseMap<const DILocalScope *, DIE *> LexicalBlockDIEs;
+
+ // List of abstract local scopes (either DISubprogram or DILexicalBlock).
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
+
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// DWO ID for correlating skeleton and split units.
@@ -94,10 +102,10 @@ class DwarfCompileUnit final : public DwarfUnit {
bool isDwoUnit() const override;
- DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
- return AbstractSPDies;
- return DU->getAbstractSPDies();
+ return AbstractLocalScopeDIEs;
+ return DU->getAbstractScopeDIEs();
}
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
@@ -108,6 +116,10 @@ class DwarfCompileUnit final : public DwarfUnit {
void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+ /// Add info for Wasm-global-based relocation.
+ void addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex);
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -171,17 +183,6 @@ public:
unsigned getOrCreateSourceID(const DIFile *File) override;
- void addImportedEntity(const DIImportedEntity* IE) {
- DIScope *Scope = IE->getScope();
- assert(Scope && "Invalid Scope encoding!");
- if (!isa<DILocalScope>(Scope))
- // No need to add imported enities that are not local declaration.
- return;
-
- auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
- ImportedEntities[LocalScope].push_back(IE);
- }
-
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
@@ -213,6 +214,11 @@ public:
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(LexicalScope *Scope);
+ /// Get a DIE for the given DILexicalBlock.
+ /// Note that this function assumes that the DIE has been already created
+ /// and it's an error, if it hasn't.
+ DIE *getLexicalBlockDIE(const DILexicalBlock *LB);
+
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
@@ -224,6 +230,10 @@ public:
void createBaseTypeDIEs();
+ /// Construct a DIE for a given scope.
+ /// This instance of 'getOrCreateContextDIE()' can handle DILocalScope.
+ DIE *getOrCreateContextDIE(const DIScope *Ty) override;
+
/// Construct a DIE for this subprogram scope.
DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope);
@@ -262,8 +272,9 @@ public:
void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
SmallVector<DbgCallSiteParam, 4> &Params);
- /// Construct import_module DIE.
- DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
+ /// Get or create a DIE for an imported entity.
+ DIE *getOrCreateImportedEntityDIE(const DIImportedEntity *IE);
+ DIE *constructImportedEntityDIE(const DIImportedEntity *IE);
void finishSubprogramDefinition(const DISubprogram *SP);
void finishEntityDefinition(const DbgEntity *Entity);
@@ -360,6 +371,8 @@ public:
bool hasDwarfPubSections() const;
void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
+
+ MDNodeSetVector &getDeferredLocalDecls() { return DeferredLocalDecls; }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index cde790cc77fb..1ae17ec9b874 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -18,7 +18,7 @@
#include "DwarfUnit.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
@@ -53,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
@@ -452,14 +453,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
// Split DWARF would benefit object size significantly by trading reductions
// in address pool usage for slightly increased range list encodings.
- if (DwarfVersion >= 5) {
+ if (DwarfVersion >= 5)
MinimizeAddr = MinimizeAddrInV5Option;
- // FIXME: In the future, enable this by default for Split DWARF where the
- // tradeoff is more pronounced due to being able to offload the range
- // lists to the dwo file and shrink object files/reduce relocations there.
- if (MinimizeAddr == MinimizeAddrInV5::Default)
- MinimizeAddr = MinimizeAddrInV5::Disabled;
- }
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
@@ -500,6 +495,7 @@ static StringRef getObjCMethodName(StringRef In) {
void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
const DISubprogram *SP, DIE &Die) {
if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
return;
@@ -513,7 +509,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
- (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
+ (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP)))
addAccelName(CU, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
@@ -710,13 +706,13 @@ static void interpretValues(const MachineInstr *CurMI,
if (MI.isDebugInstr())
return;
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg().isPhysical()) {
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg().isPhysical()) {
for (auto &FwdReg : ForwardedRegWorklist)
if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
Defs.insert(FwdReg.first);
- for (MCRegUnitIterator Units(MO.getReg(), &TRI); Units.isValid(); ++Units)
- NewClobberedRegUnits.insert(*Units);
+ for (MCRegUnit Unit : TRI.regunits(MO.getReg()))
+ NewClobberedRegUnits.insert(Unit);
}
}
};
@@ -1050,11 +1046,11 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
if (!SDK.empty())
NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);
- // Add DW_str_offsets_base to the unit DIE, except for split units.
- if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
- NewCU.addStringOffsetsStart();
-
if (!useSplitDwarf()) {
+ // Add DW_str_offsets_base to the unit DIE, except for split units.
+ if (useSegmentedStringOffsetsTable())
+ NewCU.addStringOffsetsStart();
+
NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
@@ -1097,6 +1093,13 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
if (auto *CU = CUMap.lookup(DIUnit))
return *CU;
+ if (useSplitDwarf() &&
+ !shareAcrossDWOCUs() &&
+ (!DIUnit->getSplitDebugInlining() ||
+ DIUnit->getEmissionKind() == DICompileUnit::FullDebug) &&
+ !CUMap.empty()) {
+ return *CUMap.begin()->second;
+ }
CompilationDir = DIUnit->getDirectory();
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
@@ -1104,9 +1107,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
DwarfCompileUnit &NewCU = *OwnedUnit;
InfoHolder.addUnit(std::move(OwnedUnit));
- for (auto *IE : DIUnit->getImportedEntities())
- NewCU.addImportedEntity(IE);
-
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
// explicitly describe the directory of all files, never relying on the
@@ -1129,14 +1129,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
return NewCU;
}
-void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity *N) {
- if (isa<DILocalScope>(N->getScope()))
- return;
- if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
- D->addChild(TheCU.constructImportedEntityDIE(N));
-}
-
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
@@ -1214,16 +1206,8 @@ void DwarfDebug::beginModule(Module *M) {
DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
- // FIXME: Move local imported entities into a list attached to the
- // subprogram, then this search won't be needed and a
- // getImportedEntities().empty() test should go below with the rest.
- bool HasNonLocalImportedEntities = llvm::any_of(
- CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
- return !isa<DILocalScope>(IE->getScope());
- });
-
- if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
- CUNode->getRetainedTypes().empty() &&
+ if (CUNode->getImportedEntities().empty() &&
+ CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
continue;
@@ -1257,10 +1241,6 @@ void DwarfDebug::beginModule(Module *M) {
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
- // Emit imported_modules last so that the relevant context is already
- // available.
- for (auto *IE : CUNode->getImportedEntities())
- constructAndAddImportedEntityDIE(CU, IE);
}
}
@@ -1300,6 +1280,8 @@ void DwarfDebug::finalizeModuleInfo() {
if (CUMap.size() > 1)
DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
+ bool HasEmittedSplitCU = false;
+
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -1318,6 +1300,10 @@ void DwarfDebug::finalizeModuleInfo() {
bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
if (HasSplitUnit) {
+ (void)HasEmittedSplitCU;
+ assert((shareAcrossDWOCUs() || !HasEmittedSplitCU) &&
+ "Multiple CUs emitted into a single dwo file");
+ HasEmittedSplitCU = true;
dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
? dwarf::DW_AT_dwo_name
: dwarf::DW_AT_GNU_dwo_name;
@@ -1377,11 +1363,10 @@ void DwarfDebug::finalizeModuleInfo() {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty()) {
- if (!useSplitDwarf())
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
- DebugLocs.getSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
}
@@ -1436,8 +1421,24 @@ void DwarfDebug::endModule() {
assert(CurMI == nullptr);
for (const auto &P : CUMap) {
- auto &CU = *P.second;
- CU.createBaseTypeDIEs();
+ const auto *CUNode = cast<DICompileUnit>(P.first);
+ DwarfCompileUnit *CU = &*P.second;
+
+ // Emit imported entities.
+ for (auto *IE : CUNode->getImportedEntities()) {
+ assert(!isa_and_nonnull<DILocalScope>(IE->getScope()) &&
+ "Unexpected function-local entity in 'imports' CU field.");
+ CU->getOrCreateImportedEntityDIE(IE);
+ }
+ for (const auto *D : CU->getDeferredLocalDecls()) {
+ if (auto *IE = dyn_cast<DIImportedEntity>(D))
+ CU->getOrCreateImportedEntityDIE(IE);
+ else
+ llvm_unreachable("Unexpected local retained node!");
+ }
+
+ // Emit base types.
+ CU->createBaseTypeDIEs();
}
// If we aren't actually generating debug info (check beginModule -
@@ -1511,16 +1512,6 @@ void DwarfDebug::endModule() {
// FIXME: AbstractVariables.clear();
}
-void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
- const DINode *Node,
- const MDNode *ScopeNode) {
- if (CU.getExistingAbstractEntity(Node))
- return;
-
- CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
- cast<DILocalScope>(ScopeNode)));
-}
-
void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node, const MDNode *ScopeNode) {
if (CU.getExistingAbstractEntity(Node))
@@ -1531,6 +1522,21 @@ void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
CU.createAbstractEntity(Node, Scope);
}
+static const DILocalScope *getRetainedNodeScope(const MDNode *N) {
+ const DIScope *S;
+ if (const auto *LV = dyn_cast<DILocalVariable>(N))
+ S = LV->getScope();
+ else if (const auto *L = dyn_cast<DILabel>(N))
+ S = L->getScope();
+ else if (const auto *IE = dyn_cast<DIImportedEntity>(N))
+ S = IE->getScope();
+ else
+ llvm_unreachable("Unexpected retained node!");
+
+ // Ensure the scope is not a DILexicalBlockFile.
+ return cast<DILocalScope>(S)->getNonLexicalBlockFileScope();
+}
+
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
@@ -1556,13 +1562,24 @@ void DwarfDebug::collectVariableInfoFromMFTable(
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
- RegVar->initializeMMI(VI.Expr, VI.Slot);
+ if (VI.inStackSlot())
+ RegVar->initializeMMI(VI.Expr, VI.getStackSlot());
+ else {
+ MachineLocation MLoc(VI.getEntryValueRegister(), /*IsIndirect*/ true);
+ auto LocEntry = DbgValueLocEntry(MLoc);
+ RegVar->initializeDbgValue(DbgValueLoc(VI.Expr, LocEntry));
+ }
LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
<< "\n");
- if (DbgVariable *DbgVar = MFVars.lookup(Var))
- DbgVar->addMMIEntry(*RegVar);
- else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
+ if (DbgVariable *DbgVar = MFVars.lookup(Var)) {
+ if (DbgVar->getValueLoc())
+ LLVM_DEBUG(dbgs() << "Dropping repeated entry value debug info for "
+ "variable "
+ << VI.Var->getName() << "\n");
+ else
+ DbgVar->addMMIEntry(*RegVar);
+ } else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
MFVars.insert({Var, RegVar.get()});
ConcreteEntities.push_back(std::move(RegVar));
}
@@ -1964,19 +1981,18 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
}
- // Collect info for variables/labels that were optimized out.
+ // Collect info for retained nodes.
for (const DINode *DN : SP->getRetainedNodes()) {
- if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
- continue;
- LexicalScope *Scope = nullptr;
- if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- Scope = LScopes.findLexicalScope(DV->getScope());
- } else if (auto *DL = dyn_cast<DILabel>(DN)) {
- Scope = LScopes.findLexicalScope(DL->getScope());
+ const auto *LS = getRetainedNodeScope(DN);
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+ LexicalScope *LexS = LScopes.findLexicalScope(LS);
+ if (LexS)
+ createConcreteEntity(TheCU, *LexS, DN, nullptr);
+ } else {
+ LocalDeclsPerLS[LS].insert(DN);
}
-
- if (Scope)
- createConcreteEntity(TheCU, *Scope, DN, nullptr);
}
}
@@ -2046,7 +2062,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
- if (DL == PrevInstLoc) {
+ bool PrevInstInSameSection =
+ (!PrevInstBB ||
+ PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum());
+ if (DL == PrevInstLoc && PrevInstInSameSection) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
@@ -2114,25 +2133,35 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
PrevInstLoc = DL;
}
-static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
+static std::pair<DebugLoc, bool> findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
DebugLoc LineZeroLoc;
+ const Function &F = MF->getFunction();
+
+ // Some instructions may be inserted into prologue after this function. Must
+ // keep prologue for these cases.
+ bool IsEmptyPrologue =
+ !(F.hasPrologueData() || F.getMetadata(LLVMContext::MD_func_sanitize));
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc()) {
- // Scan forward to try to find a non-zero line number. The prologue_end
- // marks the first breakpoint in the function after the frame setup, and
- // a compiler-generated line 0 location is not a meaningful breakpoint.
- // If none is found, return the first location after the frame setup.
- if (MI.getDebugLoc().getLine())
- return MI.getDebugLoc();
- LineZeroLoc = MI.getDebugLoc();
+ if (!MI.isMetaInstruction()) {
+ if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The
+ // prologue_end marks the first breakpoint in the function after the
+ // frame setup, and a compiler-generated line 0 location is not a
+ // meaningful breakpoint. If none is found, return the first
+ // location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return std::make_pair(MI.getDebugLoc(), IsEmptyPrologue);
+
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ IsEmptyPrologue = false;
}
}
}
- return LineZeroLoc;
+ return std::make_pair(LineZeroLoc, IsEmptyPrologue);
}
/// Register a source line with debug info. Returns the unique label that was
@@ -2159,8 +2188,16 @@ static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
unsigned CUID) {
+ std::pair<DebugLoc, bool> PrologEnd = findPrologueEndLoc(&MF);
+ DebugLoc PrologEndLoc = PrologEnd.first;
+ bool IsEmptyPrologue = PrologEnd.second;
+
// Get beginning of function.
- if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
+ if (PrologEndLoc) {
+ // If the prolog is empty, no need to generate scope line for the proc.
+ if (IsEmptyPrologue)
+ return PrologEndLoc;
+
// Ensure the compile unit is created if the function is called before
// beginFunction().
(void)getOrCreateDwarfCompileUnit(
@@ -2239,7 +2276,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
- DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+ DwarfCompileUnit &TheCU = getOrCreateDwarfCompileUnit(SP->getUnit());
if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
PrevLabel = nullptr;
CurFn = nullptr;
@@ -2260,6 +2297,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ for (const auto &R : Asm->MBBSectionRanges)
+ addArangeLabel(SymbolCU(&TheCU, R.second.BeginLabel));
+
assert(InfoHolder.getScopeVariables().empty());
PrevLabel = nullptr;
CurFn = nullptr;
@@ -2267,27 +2307,28 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
}
#ifndef NDEBUG
- size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
+ size_t NumAbstractSubprograms = LScopes.getAbstractScopesList().size();
#endif
- // Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
const auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
- if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
- continue;
-
- const MDNode *Scope = nullptr;
- if (auto *DV = dyn_cast<DILocalVariable>(DN))
- Scope = DV->getScope();
- else if (auto *DL = dyn_cast<DILabel>(DN))
- Scope = DL->getScope();
- else
- llvm_unreachable("Unexpected DI type!");
-
- // Collect info for variables/labels that were optimized out.
- ensureAbstractEntityIsCreated(TheCU, DN, Scope);
- assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
- && "ensureAbstractEntityIsCreated inserted abstract scopes");
+ const auto *LS = getRetainedNodeScope(DN);
+ // Ensure LexicalScope is created for the scope of this node.
+ auto *LexS = LScopes.getOrCreateAbstractScope(LS);
+ assert(LexS && "Expected the LexicalScope to be created.");
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ // Collect info for variables/labels that were optimized out.
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second ||
+ TheCU.getExistingAbstractEntity(DN))
+ continue;
+ TheCU.createAbstractEntity(DN, LexS);
+ } else {
+ // Remember the node if this is a local declarations.
+ LocalDeclsPerLS[LS].insert(DN);
+ }
+ assert(
+ LScopes.getAbstractScopesList().size() == NumAbstractSubprograms &&
+ "getOrCreateAbstractScope() inserted an abstract subprogram scope");
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
@@ -2308,6 +2349,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
InfoHolder.getScopeLabels().clear();
+ LocalDeclsPerLS.clear();
PrevLabel = nullptr;
CurFn = nullptr;
}
@@ -2507,10 +2549,13 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
Asm->emitDwarfLengthOrOffset(TheU->getLength());
// Emit the pubnames for this compilation unit.
- for (const auto &GI : Globals) {
- const char *Name = GI.getKeyData();
- const DIE *Entity = GI.second;
-
+ SmallVector<std::pair<StringRef, const DIE *>, 0> Vec;
+ for (const auto &GI : Globals)
+ Vec.emplace_back(GI.first(), GI.second);
+ llvm::sort(Vec, [](auto &A, auto &B) {
+ return A.second->getOffset() < B.second->getOffset();
+ });
+ for (const auto &[Name, Entity] : Vec) {
Asm->OutStreamer->AddComment("DIE offset");
Asm->emitDwarfLengthOrOffset(Entity->getOffset());
@@ -2523,7 +2568,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("External Name");
- Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1));
+ Asm->OutStreamer->emitBytes(StringRef(Name.data(), Name.size() + 1));
}
Asm->OutStreamer->AddComment("End Mark");
@@ -2566,11 +2611,10 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
for (const auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
+ assert(!Op.getSubCode() && "SubOps not yet supported");
Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
Offset++;
- for (unsigned I = 0; I < 2; ++I) {
- if (Op.getDescription().Op[I] == Encoding::SizeNA)
- continue;
+ for (unsigned I = 0; I < Op.getDescription().Op.size(); ++I) {
if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
unsigned Length =
Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
@@ -3495,10 +3539,11 @@ template <typename DataT>
void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
AccelTable<DataT> &AppleAccel, StringRef Name,
const DIE &Die) {
- if (getAccelTableKind() == AccelTableKind::None)
+ if (getAccelTableKind() == AccelTableKind::None || Name.empty())
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
return;
@@ -3555,11 +3600,9 @@ dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
}
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
- auto I = SectionLabels.find(S);
- if (I == SectionLabels.end())
- return nullptr;
- return I->second;
+ return SectionLabels.lookup(S);
}
+
void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
if (useSplitDwarf() || getDwarfVersion() >= 5)
@@ -3583,3 +3626,13 @@ DwarfDebug::getMD5AsBytes(const DIFile *File) const {
std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
return CKMem;
}
+
+bool DwarfDebug::alwaysUseRanges(const DwarfCompileUnit &CU) const {
+ if (MinimizeAddr == MinimizeAddrInV5::Ranges)
+ return true;
+ if (MinimizeAddr != MinimizeAddrInV5::Default)
+ return false;
+ if (useSplitDwarf())
+ return true;
+ return false;
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 5d2ef8ee79a7..1af4b643eb17 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -318,9 +318,14 @@ class DwarfDebug : public DebugHandlerBase {
/// This is a collection of subprogram MDNodes that are processed to
/// create DIEs.
- SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
- SmallPtrSet<const DISubprogram *, 16>>
- ProcessedSPNodes;
+ SmallSetVector<const DISubprogram *, 16> ProcessedSPNodes;
+
+ /// Map function-local imported entities to their parent local scope
+ /// (either DILexicalBlock or DISubprogram) for a processed function
+ /// (including inlined subprograms).
+ using MDNodeSet = SetVector<const MDNode *, SmallVector<const MDNode *, 2>,
+ SmallPtrSet<const MDNode *, 2>>;
+ DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
/// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn = nullptr;
@@ -456,9 +461,6 @@ private:
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
- void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
- const DINode *Node,
- const MDNode *Scope);
void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node,
const MDNode *Scope);
@@ -598,10 +600,6 @@ private:
void finishUnitAttributes(const DICompileUnit *DIUnit,
DwarfCompileUnit &NewCU);
- /// Construct imported_module or imported_declaration DIE.
- void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity *N);
-
/// Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
@@ -696,9 +694,7 @@ public:
/// Returns whether range encodings should be used for single entry range
/// lists.
- bool alwaysUseRanges() const {
- return MinimizeAddr == MinimizeAddrInV5::Ranges;
- }
+ bool alwaysUseRanges(const DwarfCompileUnit &) const;
// Returns whether novel exprloc addrx+offset encodings should be used to
// reduce debug_addr size.
@@ -842,6 +838,10 @@ public:
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
std::optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+ MDNodeSet &getLocalDeclsForScope(const DILocalScope *S) {
+ return LocalDeclsPerLS[S];
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ab6967f50e30..7623b7fb7c5d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -117,10 +117,10 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// Walk up the super-register chain until we find a valid number.
// For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
- for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
- Reg = TRI.getDwarfRegNum(*SR, false);
+ for (MCPhysReg SR : TRI.superregs(MachineReg)) {
+ Reg = TRI.getDwarfRegNum(SR, false);
if (Reg >= 0) {
- unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+ unsigned Idx = TRI.getSubRegIndex(SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
DwarfRegs.push_back(Register::createRegister(Reg, "super-register"));
@@ -142,11 +142,11 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// this doesn't find a combination of subregisters that fully cover
// the register (even though one may exist).
SmallBitVector Coverage(RegSize, false);
- for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
- unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+ for (MCPhysReg SR : TRI.subregs(MachineReg)) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, SR);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned Offset = TRI.getSubRegIdxOffset(Idx);
- Reg = TRI.getDwarfRegNum(*SR, false);
+ Reg = TRI.getDwarfRegNum(SR, false);
if (Reg < 0)
continue;
@@ -566,6 +566,12 @@ bool DwarfExpression::addExpression(
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_eq:
+ case dwarf::DW_OP_ne:
+ case dwarf::DW_OP_gt:
+ case dwarf::DW_OP_ge:
+ case dwarf::DW_OP_lt:
+ case dwarf::DW_OP_le:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 79a6ce7801b7..464f4f048016 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -26,6 +26,7 @@ class DbgEntity;
class DbgVariable;
class DbgLabel;
class DINode;
+class DILocalScope;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
@@ -87,7 +88,7 @@ class DwarfFile {
DenseMap<LexicalScope *, LabelList> ScopeLabels;
// Collection of abstract subprogram DIEs.
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
@@ -162,8 +163,8 @@ public:
return ScopeLabels;
}
- DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
- return AbstractSPDies;
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
+ return AbstractLocalScopeDIEs;
}
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index c2ff899c04ab..d30f0ef7af34 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -543,7 +543,7 @@ void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) {
}
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
- if (!Context || isa<DIFile>(Context))
+ if (!Context || isa<DIFile>(Context) || isa<DICompileUnit>(Context))
return &getUnitDie();
if (auto *T = dyn_cast<DIType>(Context))
return getOrCreateTypeDIE(T);
@@ -1223,7 +1223,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
"decl has a linkage name and it is different");
if (DeclLinkageName.empty() &&
// Always emit it for abstract subprograms.
- (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP)))
+ (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP)))
addLinkageName(SPDie, LinkageName);
if (!DeclDie)
@@ -1362,16 +1362,16 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
- if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DW_Subrange, Attr, *VarDIE);
- } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(BE);
addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
- } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
+ } else if (auto *BI = dyn_cast_if_present<ConstantInt *>(Bound)) {
if (Attr == dwarf::DW_AT_count) {
if (BI->getSExtValue() != -1)
addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue());
@@ -1401,10 +1401,10 @@ void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DIGenericSubrange::BoundType Bound) -> void {
- if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
- } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
if (BE->isConstant() &&
DIExpression::SignedOrUnsignedConstant::SignedConstant ==
*BE->isConstant()) {
@@ -1463,7 +1463,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
const auto Subrange = cast<DISubrange>(Elements[0]);
const auto NumVecElements =
Subrange->getCount()
- ? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
+ ? cast<ConstantInt *>(Subrange->getCount())->getSExtValue()
: 0;
// Ensure we found the element count and that the actual size is wide
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 0caa6adbfa62..8f17e94c2d1c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -245,10 +245,10 @@ public:
DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
/// Find existing DIE or create new DIE for the given type.
- DIE *getOrCreateTypeDIE(const MDNode *TyNode);
+ virtual DIE *getOrCreateTypeDIE(const MDNode *TyNode);
/// Get context owner's DIE.
- DIE *getOrCreateContextDIE(const DIScope *Context);
+ virtual DIE *getOrCreateContextDIE(const DIScope *Context);
/// Construct DIEs for types that contain vtables.
void constructContainingTypeDIEs();
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 67e2c0e07095..eef6b1d93f36 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -410,7 +410,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
computeActionsTable(LandingPads, Actions, FirstActions);
// Compute the call-site table and call-site ranges. Normally, there is only
- // one call-site-range which covers the whole funciton. With
+ // one call-site-range which covers the whole function. With
// -basic-block-sections, there is one call-site-range per basic block
// section.
SmallVector<CallSiteEntry, 64> CallSites;
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 3e75b4371033..59c3fa15885e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -32,11 +32,7 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
SmallVector<InlineSite, 8> ReversedInlineStack;
auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
while (InlinedAt) {
- const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
- // Use linkage name for C++ if possible.
- auto Name = SP->getLinkageName();
- if (Name.empty())
- Name = SP->getName();
+ auto Name = InlinedAt->getSubprogramLinkageName();
// Use caching to avoid redundant md5 computation for build speed.
uint64_t &CallerGuid = NameGuidMap[Name];
if (!CallerGuid)
@@ -46,8 +42,15 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
InlinedAt = InlinedAt->getInlinedAt();
}
-
+ uint64_t Discriminator = 0;
+ // For now only block probes have FS discriminators. See
+ // MIRFSDiscriminator.cpp for more details.
+ if (EnableFSDiscriminator && DebugLoc &&
+ (Type == (uint64_t)PseudoProbeType::Block))
+ Discriminator = DebugLoc->getDiscriminator();
+ assert((EnableFSDiscriminator || Discriminator == 0) &&
+ "Discriminator should not be set in non-FSAFDO mode");
SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
- Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack,
- Asm->CurrentFnSym);
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, Discriminator,
+ InlineStack, Asm->CurrentFnSym);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 7a800438592c..6d6432b61f2d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -638,7 +638,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
const MCExpr *FilterOrFinally;
const MCExpr *ExceptOrNull;
- auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
if (UME.IsFinally) {
FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
ExceptOrNull = MCConstantExpr::create(0, Ctx);
@@ -762,7 +762,11 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.emitInt32(0);
AddComment("EHFlags");
- OS.emitInt32(1);
+ if (MMI->getModule()->getModuleFlag("eh-asynch")) {
+ OS.emitInt32(0);
+ } else {
+ OS.emitInt32(1);
+ }
// UnwindMapEntry {
// int32_t ToState;
@@ -771,8 +775,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
if (UnwindMapXData) {
OS.emitLabel(UnwindMapXData);
for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
- MCSymbol *CleanupSym =
- getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+ MCSymbol *CleanupSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(UME.Cleanup));
AddComment("ToState");
OS.emitInt32(UME.ToState);
@@ -859,8 +863,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
}
- MCSymbol *HandlerSym =
- getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+ MCSymbol *HandlerSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(HT.Handler));
AddComment("Adjectives");
OS.emitInt32(HT.Adjectives);
@@ -1065,7 +1069,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
assert(!FuncInfo.SEHUnwindMap.empty());
for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
- auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
const MCSymbol *ExceptOrFinally =
UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
// -1 is usually the base state for "unwind to caller", but for
@@ -1136,7 +1140,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
DenseMap<const MachineBasicBlock *, int> HandlerStates;
for (int State = 0; State < NumStates; ++State) {
MachineBasicBlock *HandlerBlock =
- FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+ cast<MachineBasicBlock *>(FuncInfo.ClrEHUnwindMap[State].Handler);
HandlerStates[HandlerBlock] = State;
// Use this loop through all handlers to verify our assumption (used in
// the MinEnclosingState computation) that enclosing funclets have lower
@@ -1297,7 +1301,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
- MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+ MachineBasicBlock *HandlerBlock = cast<MachineBasicBlock *>(Entry.Handler);
MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
MCSymbol *EndSym = EndSymbolMap[Clause.State];
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 7098824dbe4b..5ef850d09d92 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1,4 +1,6 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "LiveDebugValues/LiveDebugValues.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -47,6 +49,12 @@ static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true),
static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false),
cl::Hidden);
+/// Coalesce adjacent dbg locs describing memory locations that have contiguous
+/// fragments. This reduces the cost of LiveDebugValues which does SSA
+/// construction for each explicitly stated variable fragment.
+static cl::opt<cl::boolOrDefault>
+ CoalesceAdjacentFragmentsOpt("debug-ata-coalesce-frags", cl::Hidden);
+
// Implicit conversions are disabled for enum class types, so unfortunately we
// need to create a DenseMapInfo wrapper around the specified underlying type.
template <> struct llvm::DenseMapInfo<VariableID> {
@@ -79,6 +87,8 @@ class FunctionVarLocsBuilder {
SmallVector<VarLocInfo> SingleLocVars;
public:
+ unsigned getNumVariables() const { return Variables.size(); }
+
/// Find or insert \p V and return the ID.
VariableID insertVariable(DebugVariable V) {
return static_cast<VariableID>(Variables.insert(V));
@@ -105,23 +115,23 @@ public:
/// Add a def for a variable that is valid for its lifetime.
void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL,
- Value *V) {
+ RawLocationWrapper R) {
VarLocInfo VarLoc;
VarLoc.VariableID = insertVariable(Var);
VarLoc.Expr = Expr;
VarLoc.DL = DL;
- VarLoc.V = V;
+ VarLoc.Values = R;
SingleLocVars.emplace_back(VarLoc);
}
/// Add a def to the wedge of defs just before /p Before.
void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr,
- DebugLoc DL, Value *V) {
+ DebugLoc DL, RawLocationWrapper R) {
VarLocInfo VarLoc;
VarLoc.VariableID = insertVariable(Var);
VarLoc.Expr = Expr;
VarLoc.DL = DL;
- VarLoc.V = V;
+ VarLoc.Values = R;
VarLocsBeforeInst[Before].emplace_back(VarLoc);
}
};
@@ -148,7 +158,11 @@ void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const {
auto PrintLoc = [&OS](const VarLocInfo &Loc) {
OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]"
- << " Expr=" << *Loc.Expr << " V=" << *Loc.V << "\n";
+ << " Expr=" << *Loc.Expr << " Values=(";
+ for (auto *Op : Loc.Values.location_ops()) {
+ errs() << Op->getName() << " ";
+ }
+ errs() << ")\n";
};
// Print the single location variables.
@@ -234,13 +248,13 @@ getDerefOffsetInBytes(const DIExpression *DIExpr) {
int64_t Offset = 0;
const unsigned NumElements = DIExpr->getNumElements();
const auto Elements = DIExpr->getElements();
- unsigned NextElement = 0;
+ unsigned ExpectedDerefIdx = 0;
// Extract the offset.
if (NumElements > 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
Offset = Elements[1];
- NextElement = 2;
+ ExpectedDerefIdx = 2;
} else if (NumElements > 3 && Elements[0] == dwarf::DW_OP_constu) {
- NextElement = 3;
+ ExpectedDerefIdx = 3;
if (Elements[2] == dwarf::DW_OP_plus)
Offset = Elements[1];
else if (Elements[2] == dwarf::DW_OP_minus)
@@ -250,19 +264,21 @@ getDerefOffsetInBytes(const DIExpression *DIExpr) {
}
// If that's all there is it means there's no deref.
- if (NextElement >= NumElements)
+ if (ExpectedDerefIdx >= NumElements)
return std::nullopt;
// Check the next element is DW_OP_deref - otherwise this is too complex or
// isn't a deref expression.
- if (Elements[NextElement] != dwarf::DW_OP_deref)
+ if (Elements[ExpectedDerefIdx] != dwarf::DW_OP_deref)
return std::nullopt;
// Check the final operation is either the DW_OP_deref or is a fragment.
- if (NumElements == NextElement + 1)
+ if (NumElements == ExpectedDerefIdx + 1)
return Offset; // Ends with deref.
- else if (NumElements == NextElement + 3 &&
- Elements[NextElement] == dwarf::DW_OP_LLVM_fragment)
+ unsigned ExpectedFragFirstIdx = ExpectedDerefIdx + 1;
+ unsigned ExpectedFragFinalIdx = ExpectedFragFirstIdx + 2;
+ if (NumElements == ExpectedFragFinalIdx + 1 &&
+ Elements[ExpectedFragFirstIdx] == dwarf::DW_OP_LLVM_fragment)
return Offset; // Ends with deref + fragment.
// Don't bother trying to interpret anything more complex.
@@ -278,6 +294,24 @@ static DebugAggregate getAggregate(const DebugVariable &Var) {
return DebugAggregate(Var.getVariable(), Var.getInlinedAt());
}
+static bool shouldCoalesceFragments(Function &F) {
+ // Enabling fragment coalescing reduces compiler run time when instruction
+ // referencing is enabled. However, it may cause LiveDebugVariables to create
+ // incorrect locations. Since instruction-referencing mode effectively
+ // bypasses LiveDebugVariables we only enable coalescing if the cl::opt flag
+ // has not been explicitly set and instruction-referencing is turned on.
+ switch (CoalesceAdjacentFragmentsOpt) {
+ case cl::boolOrDefault::BOU_UNSET:
+ return debuginfoShouldUseDebugInstrRef(
+ Triple(F.getParent()->getTargetTriple()));
+ case cl::boolOrDefault::BOU_TRUE:
+ return true;
+ case cl::boolOrDefault::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Unknown boolOrDefault value");
+}
+
namespace {
/// In dwarf emission, the following sequence
/// 1. dbg.value ... Fragment(0, 64)
@@ -301,6 +335,7 @@ class MemLocFragmentFill {
Function &Fn;
FunctionVarLocsBuilder *FnVarLocs;
const DenseSet<DebugAggregate> *VarsWithStackSlot;
+ bool CoalesceAdjacentFragments;
// 0 = no memory location.
using BaseAddress = unsigned;
@@ -315,7 +350,7 @@ class MemLocFragmentFill {
/// IDs for memory location base addresses in maps. Use 0 to indicate that
/// there's no memory location.
- UniqueVector<Value *> Bases;
+ UniqueVector<RawLocationWrapper> Bases;
UniqueVector<DebugAggregate> Aggregates;
DenseMap<const BasicBlock *, VarFragMap> LiveIn;
DenseMap<const BasicBlock *, VarFragMap> LiveOut;
@@ -368,7 +403,7 @@ class MemLocFragmentFill {
/// Return a string for the value that \p BaseID represents.
std::string toString(unsigned BaseID) {
if (BaseID)
- return Bases[BaseID]->getName().str();
+ return Bases[BaseID].getVariableLocationOp(0)->getName().str();
else
return "None";
}
@@ -565,6 +600,31 @@ class MemLocFragmentFill {
<< " bits [" << StartBit << ", " << EndBit << ")\n");
}
+ /// Inserts a new dbg def if the interval found when looking up \p StartBit
+ /// in \p FragMap starts before \p StartBit or ends after \p EndBit (which
+ /// indicates - assuming StartBit->EndBit has just been inserted - that the
+ /// slice has been coalesced in the map).
+ void coalesceFragments(BasicBlock &BB, Instruction &Before, unsigned Var,
+ unsigned StartBit, unsigned EndBit, unsigned Base,
+ DebugLoc DL, const FragsInMemMap &FragMap) {
+ if (!CoalesceAdjacentFragments)
+ return;
+ // We've inserted the location into the map. The map will have coalesced
+ // adjacent intervals (variable fragments) that describe the same memory
+ // location. Use this knowledge to insert a debug location that describes
+ // that coalesced fragment. This may eclipse other locs we've just
+ // inserted. This is okay as redundant locs will be cleaned up later.
+ auto CoalescedFrag = FragMap.find(StartBit);
+ // Bail if no coalescing has taken place.
+ if (CoalescedFrag.start() == StartBit && CoalescedFrag.stop() == EndBit)
+ return;
+
+ LLVM_DEBUG(dbgs() << "- Insert loc for bits " << CoalescedFrag.start()
+ << " to " << CoalescedFrag.stop() << "\n");
+ insertMemLoc(BB, Before, Var, CoalescedFrag.start(), CoalescedFrag.stop(),
+ Base, DL);
+ }
+
void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB,
VarFragMap &LiveSet) {
DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID);
@@ -601,7 +661,7 @@ class MemLocFragmentFill {
const auto DerefOffsetInBytes = getDerefOffsetInBytes(DIExpr);
const unsigned Base =
DerefOffsetInBytes && *DerefOffsetInBytes * 8 == StartBit
- ? Bases.insert(VarLoc.V)
+ ? Bases.insert(VarLoc.Values)
: 0;
LLVM_DEBUG(dbgs() << "DEF " << DbgVar.getVariable()->getName() << " ["
<< StartBit << ", " << EndBit << "): " << toString(Base)
@@ -630,6 +690,8 @@ class MemLocFragmentFill {
if (!FragMap.overlaps(StartBit, EndBit)) {
LLVM_DEBUG(dbgs() << "- No overlaps\n");
FragMap.insert(StartBit, EndBit, Base);
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
return;
}
// There is at least one overlap.
@@ -720,6 +782,9 @@ class MemLocFragmentFill {
LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n");
FragMap.insert(StartBit, EndBit, Base);
}
+
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
}
bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); }
@@ -737,8 +802,10 @@ class MemLocFragmentFill {
public:
MemLocFragmentFill(Function &Fn,
- const DenseSet<DebugAggregate> *VarsWithStackSlot)
- : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot) {}
+ const DenseSet<DebugAggregate> *VarsWithStackSlot,
+ bool CoalesceAdjacentFragments)
+ : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot),
+ CoalesceAdjacentFragments(CoalesceAdjacentFragments) {}
/// Add variable locations to \p FnVarLocs so that any bits of a variable
/// with a memory location have that location explicitly reinstated at each
@@ -845,18 +912,20 @@ public:
}
// Insert new location defs.
- for (auto Pair : BBInsertBeforeMap) {
+ for (auto &Pair : BBInsertBeforeMap) {
InsertMap &Map = Pair.second;
- for (auto Pair : Map) {
+ for (auto &Pair : Map) {
Instruction *InsertBefore = Pair.first;
assert(InsertBefore && "should never be null");
auto FragMemLocs = Pair.second;
auto &Ctx = Fn.getContext();
- for (auto FragMemLoc : FragMemLocs) {
+ for (auto &FragMemLoc : FragMemLocs) {
DIExpression *Expr = DIExpression::get(Ctx, std::nullopt);
- Expr = *DIExpression::createFragmentExpression(
- Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
+ if (FragMemLoc.SizeInBits !=
+ *Aggregates[FragMemLoc.Var].first->getSizeInBits())
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter,
FragMemLoc.OffsetInBits / 8);
DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr,
@@ -961,14 +1030,17 @@ public:
}
};
- using AssignmentMap = DenseMap<VariableID, Assignment>;
- using LocMap = DenseMap<VariableID, LocKind>;
- using OverlapMap = DenseMap<VariableID, SmallVector<VariableID, 4>>;
+ using AssignmentMap = SmallVector<Assignment>;
+ using LocMap = SmallVector<LocKind>;
+ using OverlapMap = DenseMap<VariableID, SmallVector<VariableID>>;
using UntaggedStoreAssignmentMap =
DenseMap<const Instruction *,
SmallVector<std::pair<VariableID, at::AssignmentInfo>>>;
private:
+ /// The highest numbered VariableID for partially promoted variables plus 1,
+ /// the values for which start at 1.
+ unsigned TrackedVariablesVectorSize = 0;
/// Map a variable to the set of variables that it fully contains.
OverlapMap VarContains;
/// Map untagged stores to the variable fragments they assign to. Used by
@@ -984,30 +1056,23 @@ private:
void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source,
Instruction *After);
- static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) {
- if (A.size() != B.size())
- return false;
- for (const auto &Pair : A) {
- VariableID Var = Pair.first;
- const Assignment &AV = Pair.second;
- auto R = B.find(Var);
- // Check if this entry exists in B, otherwise ret false.
- if (R == B.end())
- return false;
- // Check that the assignment value is the same.
- if (!AV.isSameSourceAssignment(R->second))
- return false;
- }
- return true;
+ static bool mapsAreEqual(const BitVector &Mask, const AssignmentMap &A,
+ const AssignmentMap &B) {
+ return llvm::all_of(Mask.set_bits(), [&](unsigned VarID) {
+ return A[VarID].isSameSourceAssignment(B[VarID]);
+ });
}
/// Represents the stack and debug assignments in a block. Used to describe
/// the live-in and live-out values for blocks, as well as the "current"
/// value as we process each instruction in a block.
struct BlockInfo {
- /// Dominating assignment to memory for each variable.
+ /// The set of variables (VariableID) being tracked in this block.
+ BitVector VariableIDsInBlock;
+ /// Dominating assignment to memory for each variable, indexed by
+ /// VariableID.
AssignmentMap StackHomeValue;
- /// Dominating assignemnt to each variable.
+ /// Dominating assignemnt to each variable, indexed by VariableID.
AssignmentMap DebugValue;
/// Location kind for each variable. LiveLoc indicates whether the
/// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue
@@ -1018,20 +1083,138 @@ private:
/// merge of multiple assignments (both are Status::NoneOrPhi). In other
/// words, the memory location may well be valid while both DebugValue and
/// StackHomeValue contain Assignments that have a Status of NoneOrPhi.
+ /// Indexed by VariableID.
LocMap LiveLoc;
+ public:
+ enum AssignmentKind { Stack, Debug };
+ const AssignmentMap &getAssignmentMap(AssignmentKind Kind) const {
+ switch (Kind) {
+ case Stack:
+ return StackHomeValue;
+ case Debug:
+ return DebugValue;
+ }
+ llvm_unreachable("Unknown AssignmentKind");
+ }
+ AssignmentMap &getAssignmentMap(AssignmentKind Kind) {
+ return const_cast<AssignmentMap &>(
+ const_cast<const BlockInfo *>(this)->getAssignmentMap(Kind));
+ }
+
+ bool isVariableTracked(VariableID Var) const {
+ return VariableIDsInBlock[static_cast<unsigned>(Var)];
+ }
+
+ const Assignment &getAssignment(AssignmentKind Kind, VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return getAssignmentMap(Kind)[static_cast<unsigned>(Var)];
+ }
+
+ LocKind getLocKind(VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return LiveLoc[static_cast<unsigned>(Var)];
+ }
+
+ /// Set LocKind for \p Var only: does not set LocKind for VariableIDs of
+ /// fragments contained win \p Var.
+ void setLocKind(VariableID Var, LocKind K) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ LiveLoc[static_cast<unsigned>(Var)] = K;
+ }
+
+ /// Set the assignment in the \p Kind assignment map for \p Var only: does
+ /// not set the assignment for VariableIDs of fragments contained win \p
+ /// Var.
+ void setAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ getAssignmentMap(Kind)[static_cast<unsigned>(Var)] = AV;
+ }
+
+ /// Return true if there is an assignment matching \p AV in the \p Kind
+ /// assignment map. Does consider assignments for VariableIDs of fragments
+ /// contained win \p Var.
+ bool hasAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) const {
+ if (!isVariableTracked(Var))
+ return false;
+ return AV.isSameSourceAssignment(getAssignment(Kind, Var));
+ }
+
/// Compare every element in each map to determine structural equality
/// (slow).
bool operator==(const BlockInfo &Other) const {
- return LiveLoc == Other.LiveLoc &&
- mapsAreEqual(StackHomeValue, Other.StackHomeValue) &&
- mapsAreEqual(DebugValue, Other.DebugValue);
+ return VariableIDsInBlock == Other.VariableIDsInBlock &&
+ LiveLoc == Other.LiveLoc &&
+ mapsAreEqual(VariableIDsInBlock, StackHomeValue,
+ Other.StackHomeValue) &&
+ mapsAreEqual(VariableIDsInBlock, DebugValue, Other.DebugValue);
}
bool operator!=(const BlockInfo &Other) const { return !(*this == Other); }
bool isValid() {
return LiveLoc.size() == DebugValue.size() &&
LiveLoc.size() == StackHomeValue.size();
}
+
+ /// Clear everything and initialise with ⊤-values for all variables.
+ void init(int NumVars) {
+ StackHomeValue.clear();
+ DebugValue.clear();
+ LiveLoc.clear();
+ VariableIDsInBlock = BitVector(NumVars);
+ StackHomeValue.insert(StackHomeValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ DebugValue.insert(DebugValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ LiveLoc.insert(LiveLoc.begin(), NumVars, LocKind::None);
+ }
+
+ /// Helper for join.
+ template <typename ElmtType, typename FnInputType>
+ static void joinElmt(int Index, SmallVector<ElmtType> &Target,
+ const SmallVector<ElmtType> &A,
+ const SmallVector<ElmtType> &B,
+ ElmtType (*Fn)(FnInputType, FnInputType)) {
+ Target[Index] = Fn(A[Index], B[Index]);
+ }
+
+ /// See comment for AssignmentTrackingLowering::joinBlockInfo.
+ static BlockInfo join(const BlockInfo &A, const BlockInfo &B, int NumVars) {
+ // Join A and B.
+ //
+ // Intersect = join(a, b) for a in A, b in B where Var(a) == Var(b)
+ // Difference = join(x, ⊤) for x where Var(x) is in A xor B
+ // Join = Intersect ∪ Difference
+ //
+ // This is achieved by performing a join on elements from A and B with
+ // variables common to both A and B (join elements indexed by var
+ // intersect), then adding ⊤-value elements for vars in A xor B. The
+ // latter part is equivalent to performing join on elements with variables
+ // in A xor B with the ⊤-value for the map element since join(x, ⊤) = ⊤.
+ // BlockInfo::init initializes all variable entries to the ⊤ value so we
+ // don't need to explicitly perform that step as Join.VariableIDsInBlock
+ // is set to the union of the variables in A and B at the end of this
+ // function.
+ BlockInfo Join;
+ Join.init(NumVars);
+
+ BitVector Intersect = A.VariableIDsInBlock;
+ Intersect &= B.VariableIDsInBlock;
+
+ for (auto VarID : Intersect.set_bits()) {
+ joinElmt(VarID, Join.LiveLoc, A.LiveLoc, B.LiveLoc, joinKind);
+ joinElmt(VarID, Join.DebugValue, A.DebugValue, B.DebugValue,
+ joinAssignment);
+ joinElmt(VarID, Join.StackHomeValue, A.StackHomeValue, B.StackHomeValue,
+ joinAssignment);
+ }
+
+ Join.VariableIDsInBlock = A.VariableIDsInBlock;
+ Join.VariableIDsInBlock |= B.VariableIDsInBlock;
+ assert(Join.isValid());
+ return Join;
+ }
};
Function &Fn;
@@ -1076,11 +1259,8 @@ private:
/// (⊤) in this case (unknown location / assignment).
///@{
static LocKind joinKind(LocKind A, LocKind B);
- static LocMap joinLocMap(const LocMap &A, const LocMap &B);
static Assignment joinAssignment(const Assignment &A, const Assignment &B);
- static AssignmentMap joinAssignmentMap(const AssignmentMap &A,
- const AssignmentMap &B);
- static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
+ BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
///@}
/// Process the instructions in \p BB updating \p LiveSet along the way. \p
@@ -1092,7 +1272,7 @@ private:
/// location information).
///@{
void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet);
- void processDbgInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgInstruction(DbgInfoIntrinsic &I, BlockInfo *LiveSet);
/// Update \p LiveSet after encountering an instruction with a DIAssignID
/// attachment, \p I.
void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet);
@@ -1113,8 +1293,15 @@ private:
/// have been called for \p Var first.
LocKind getLocKind(BlockInfo *LiveSet, VariableID Var);
/// Return true if \p Var has an assignment in \p M matching \p AV.
- bool hasVarWithAssignment(VariableID Var, const Assignment &AV,
- const AssignmentMap &M);
+ bool hasVarWithAssignment(BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind,
+ VariableID Var, const Assignment &AV);
+ /// Return the set of VariableIDs corresponding the fragments contained fully
+ /// within the variable/fragment \p Var.
+ ArrayRef<VariableID> getContainedFragments(VariableID Var) const;
+
+ /// Mark \p Var as having been touched this frame. Note, this applies only
+ /// to the exact fragment \p Var and not to any fragments contained within.
+ void touchFragment(VariableID Var);
/// Emit info for variables that are fully promoted.
bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs);
@@ -1129,66 +1316,60 @@ public:
};
} // namespace
+ArrayRef<VariableID>
+AssignmentTrackingLowering::getContainedFragments(VariableID Var) const {
+ auto R = VarContains.find(Var);
+ if (R == VarContains.end())
+ return std::nullopt;
+ return R->second;
+}
+
+void AssignmentTrackingLowering::touchFragment(VariableID Var) {
+ VarsTouchedThisFrame.insert(Var);
+}
+
void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var,
LocKind K) {
auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) {
- VarsTouchedThisFrame.insert(Var);
- LiveSet->LiveLoc[Var] = K;
+ LiveSet->setLocKind(Var, K);
+ touchFragment(Var);
};
SetKind(LiveSet, Var, K);
// Update the LocKind for all fragments contained within Var.
- for (VariableID Frag : VarContains[Var])
+ for (VariableID Frag : getContainedFragments(Var))
SetKind(LiveSet, Frag, K);
}
AssignmentTrackingLowering::LocKind
AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) {
- auto Pair = LiveSet->LiveLoc.find(Var);
- assert(Pair != LiveSet->LiveLoc.end());
- return Pair->second;
+ return LiveSet->getLocKind(Var);
}
void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
- auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
- LiveSet->StackHomeValue[Var] = AV;
- // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet.
- LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()});
- // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
- // of addMemDef will call setLocKind to override.
- LiveSet->LiveLoc.insert({Var, LocKind::None});
- };
- AddDef(LiveSet, Var, AV);
+ LiveSet->setAssignment(BlockInfo::Stack, Var, AV);
// Use this assigment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
FragAV.Source = nullptr;
- for (VariableID Frag : VarContains[Var])
- AddDef(LiveSet, Frag, FragAV);
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Stack, Frag, FragAV);
}
void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
- auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
- LiveSet->DebugValue[Var] = AV;
- // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet.
- LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()});
- // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
- // of addDbgDef will call setLocKind to override.
- LiveSet->LiveLoc.insert({Var, LocKind::None});
- };
- AddDef(LiveSet, Var, AV);
+ LiveSet->setAssignment(BlockInfo::Debug, Var, AV);
// Use this assigment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
FragAV.Source = nullptr;
- for (VariableID Frag : VarContains[Var])
- AddDef(LiveSet, Frag, FragAV);
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Debug, Frag, FragAV);
}
static DIAssignID *getIDFromInst(const Instruction &I) {
@@ -1200,24 +1381,16 @@ static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) {
}
/// Return true if \p Var has an assignment in \p M matching \p AV.
-bool AssignmentTrackingLowering::hasVarWithAssignment(VariableID Var,
- const Assignment &AV,
- const AssignmentMap &M) {
- auto AssignmentIsMapped = [](VariableID Var, const Assignment &AV,
- const AssignmentMap &M) {
- auto R = M.find(Var);
- if (R == M.end())
- return false;
- return AV.isSameSourceAssignment(R->second);
- };
-
- if (!AssignmentIsMapped(Var, AV, M))
+bool AssignmentTrackingLowering::hasVarWithAssignment(
+ BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ if (!LiveSet->hasAssignment(Kind, Var, AV))
return false;
// Check all the frags contained within Var as these will have all been
// mapped to AV at the last store to Var.
- for (VariableID Frag : VarContains[Var])
- if (!AssignmentIsMapped(Frag, AV, M))
+ for (VariableID Frag : getContainedFragments(Var))
+ if (!LiveSet->hasAssignment(Kind, Frag, AV))
return false;
return true;
}
@@ -1242,10 +1415,11 @@ void AssignmentTrackingLowering::emitDbgValue(
const DbgVariableIntrinsic *Source, Instruction *After) {
DILocation *DL = Source->getDebugLoc();
- auto Emit = [this, Source, After, DL](Value *Val, DIExpression *Expr) {
+ auto Emit = [this, Source, After, DL](Metadata *Val, DIExpression *Expr) {
assert(Expr);
if (!Val)
- Val = PoisonValue::get(Type::getInt1Ty(Source->getContext()));
+ Val = ValueAsMetadata::get(
+ PoisonValue::get(Type::getInt1Ty(Source->getContext())));
// Find a suitable insert point.
Instruction *InsertBefore = After->getNextNode();
@@ -1255,7 +1429,7 @@ void AssignmentTrackingLowering::emitDbgValue(
VarLocInfo VarLoc;
VarLoc.VariableID = static_cast<VariableID>(Var);
VarLoc.Expr = Expr;
- VarLoc.V = Val;
+ VarLoc.Values = RawLocationWrapper(Val);
VarLoc.DL = DL;
// Insert it into the map for later.
InsertBeforeMap[InsertBefore].push_back(VarLoc);
@@ -1284,16 +1458,13 @@ void AssignmentTrackingLowering::emitDbgValue(
// The address-expression has an implicit deref, add it now.
std::tie(Val, Expr) =
walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr);
- Emit(Val, Expr);
+ Emit(ValueAsMetadata::get(Val), Expr);
return;
}
}
if (Kind == LocKind::Val) {
- /// Get the value component, converting to Undef if it is variadic.
- Value *Val =
- Source->hasArgList() ? nullptr : Source->getVariableLocationOp(0);
- Emit(Val, Source->getExpression());
+ Emit(Source->getRawLocation(), Source->getExpression());
return;
}
@@ -1371,7 +1542,8 @@ void AssignmentTrackingLowering::processUntaggedInstruction(
VarLocInfo VarLoc;
VarLoc.VariableID = static_cast<VariableID>(Var);
VarLoc.Expr = DIE;
- VarLoc.V = const_cast<AllocaInst *>(Info.Base);
+ VarLoc.Values = RawLocationWrapper(
+ ValueAsMetadata::get(const_cast<AllocaInst *>(Info.Base)));
VarLoc.DL = DILoc;
// 3. Insert it into the map for later.
InsertBeforeMap[InsertBefore].push_back(VarLoc);
@@ -1405,13 +1577,14 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// The last assignment to the stack is now AV. Check if the last debug
// assignment has a matching Assignment.
- if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) {
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Debug, Var, AV)) {
// The StackHomeValue and DebugValue for this variable match so we can
// emit a stack home location here.
LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n");
LLVM_DEBUG(dbgs() << " Debug val: ";
- LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n");
+ LiveSet->DebugValue[static_cast<unsigned>(Var)].dump(dbgs());
+ dbgs() << "\n");
setLocKind(LiveSet, Var, LocKind::Mem);
emitDbgValue(LocKind::Mem, DAI, &I);
continue;
@@ -1434,7 +1607,8 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// There's been an assignment to memory that we were using as a
// location for this variable, and the Assignment doesn't match what
// we'd expect to see in memory.
- if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) {
+ Assignment DbgAV = LiveSet->getAssignment(BlockInfo::Debug, Var);
+ if (DbgAV.Status == Assignment::NoneOrPhi) {
// We need to terminate any previously open location now.
LLVM_DEBUG(dbgs() << "None, No Debug value available\n";);
setLocKind(LiveSet, Var, LocKind::None);
@@ -1443,9 +1617,8 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// The previous DebugValue Value can be used here.
LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";);
setLocKind(LiveSet, Var, LocKind::Val);
- Assignment PrevAV = LiveSet->DebugValue.lookup(Var);
- if (PrevAV.Source) {
- emitDbgValue(LocKind::Val, PrevAV.Source, &I);
+ if (DbgAV.Source) {
+ emitDbgValue(LocKind::Val, DbgAV.Source, &I);
} else {
// PrevAV.Source is nullptr so we must emit undef here.
emitDbgValue(LocKind::None, DAI, &I);
@@ -1479,7 +1652,7 @@ void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI,
// Check if the DebugValue and StackHomeValue both hold the same
// Assignment.
- if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) {
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Stack, Var, AV)) {
// They match. We can use the stack home because the debug intrinsics state
// that an assignment happened here, and we know that specific assignment
// was the last one to take place in memory for this variable.
@@ -1529,9 +1702,22 @@ void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI,
emitDbgValue(LocKind::Val, &DVI, &DVI);
}
+static bool hasZeroSizedFragment(DbgVariableIntrinsic &DVI) {
+ if (auto F = DVI.getExpression()->getFragmentInfo())
+ return F->SizeInBits == 0;
+ return false;
+}
+
void AssignmentTrackingLowering::processDbgInstruction(
- Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
- assert(!isa<DbgAddrIntrinsic>(&I) && "unexpected dbg.addr");
+ DbgInfoIntrinsic &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ return;
+
+ // Ignore assignments to zero bits of the variable.
+ if (hasZeroSizedFragment(*DVI))
+ return;
+
if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
processDbgAssign(*DAI, LiveSet);
else if (auto *DVI = dyn_cast<DbgValueInst>(&I))
@@ -1561,10 +1747,11 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
++II;
}
while (II != EI) {
- if (!isa<DbgInfoIntrinsic>(&*II))
+ auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II);
+ if (!Dbg)
break;
resetInsertionPoint(*II);
- processDbgInstruction(*II, LiveSet);
+ processDbgInstruction(*Dbg, LiveSet);
assert(LiveSet->isValid());
++II;
}
@@ -1597,54 +1784,6 @@ AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) {
return A == B ? A : LocKind::None;
}
-AssignmentTrackingLowering::LocMap
-AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) {
- // Join A and B.
- //
- // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
- // D = join(x, ⊤) for x where Var(x) is in A xor B
- // Join = U ∪ D
- //
- // This is achieved by performing a join on elements from A and B with
- // variables common to both A and B (join elements indexed by var intersect),
- // then adding LocKind::None elements for vars in A xor B. The latter part is
- // equivalent to performing join on elements with variables in A xor B with
- // LocKind::None (⊤) since join(x, ⊤) = ⊤.
- LocMap Join;
- SmallVector<VariableID, 16> SymmetricDifference;
- // Insert the join of the elements with common vars into Join. Add the
- // remaining elements to into SymmetricDifference.
- for (const auto &[Var, Loc] : A) {
- // If this Var doesn't exist in B then add it to the symmetric difference
- // set.
- auto R = B.find(Var);
- if (R == B.end()) {
- SymmetricDifference.push_back(Var);
- continue;
- }
- // There is an entry for Var in both, join it.
- Join[Var] = joinKind(Loc, R->second);
- }
- unsigned IntersectSize = Join.size();
- (void)IntersectSize;
-
- // Add the elements in B with variables that are not in A into
- // SymmetricDifference.
- for (const auto &Pair : B) {
- VariableID Var = Pair.first;
- if (A.count(Var) == 0)
- SymmetricDifference.push_back(Var);
- }
-
- // Add SymmetricDifference elements to Join and return the result.
- for (const auto &Var : SymmetricDifference)
- Join.insert({Var, LocKind::None});
-
- assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
- assert(Join.size() >= A.size() && Join.size() >= B.size());
- return Join;
-}
-
AssignmentTrackingLowering::Assignment
AssignmentTrackingLowering::joinAssignment(const Assignment &A,
const Assignment &B) {
@@ -1687,107 +1826,80 @@ AssignmentTrackingLowering::joinAssignment(const Assignment &A,
return Assignment::make(A.ID, Source);
}
-AssignmentTrackingLowering::AssignmentMap
-AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A,
- const AssignmentMap &B) {
- // Join A and B.
- //
- // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
- // D = join(x, ⊤) for x where Var(x) is in A xor B
- // Join = U ∪ D
- //
- // This is achieved by performing a join on elements from A and B with
- // variables common to both A and B (join elements indexed by var intersect),
- // then adding LocKind::None elements for vars in A xor B. The latter part is
- // equivalent to performing join on elements with variables in A xor B with
- // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤.
- AssignmentMap Join;
- SmallVector<VariableID, 16> SymmetricDifference;
- // Insert the join of the elements with common vars into Join. Add the
- // remaining elements to into SymmetricDifference.
- for (const auto &[Var, AV] : A) {
- // If this Var doesn't exist in B then add it to the symmetric difference
- // set.
- auto R = B.find(Var);
- if (R == B.end()) {
- SymmetricDifference.push_back(Var);
- continue;
- }
- // There is an entry for Var in both, join it.
- Join[Var] = joinAssignment(AV, R->second);
- }
- unsigned IntersectSize = Join.size();
- (void)IntersectSize;
-
- // Add the elements in B with variables that are not in A into
- // SymmetricDifference.
- for (const auto &Pair : B) {
- VariableID Var = Pair.first;
- if (A.count(Var) == 0)
- SymmetricDifference.push_back(Var);
- }
-
- // Add SymmetricDifference elements to Join and return the result.
- for (auto Var : SymmetricDifference)
- Join.insert({Var, Assignment::makeNoneOrPhi()});
-
- assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
- assert(Join.size() >= A.size() && Join.size() >= B.size());
- return Join;
-}
-
AssignmentTrackingLowering::BlockInfo
AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A,
const BlockInfo &B) {
- BlockInfo Join;
- Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc);
- Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue);
- Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue);
- assert(Join.isValid());
- return Join;
+ return BlockInfo::join(A, B, TrackedVariablesVectorSize);
}
bool AssignmentTrackingLowering::join(
const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited) {
- BlockInfo BBLiveIn;
- bool FirstJoin = true;
- // LiveIn locs for BB is the join of the already-processed preds' LiveOut
- // locs.
+
+ SmallVector<const BasicBlock *> VisitedPreds;
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block. This
+ // is essentially the same as initialising all LocKinds and Assignments to
+ // an implicit ⊥ value which is the identity value for the join operation.
for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
- // Ignore backedges if we have not visited the predecessor yet. As the
- // predecessor hasn't yet had locations propagated into it, most locations
- // will not yet be valid, so treat them as all being uninitialized and
- // potentially valid. If a location guessed to be correct here is
- // invalidated later, we will remove it when we revisit this block. This
- // is essentially the same as initialising all LocKinds and Assignments to
- // an implicit ⊥ value which is the identity value for the join operation.
const BasicBlock *Pred = *I;
- if (!Visited.count(Pred))
- continue;
+ if (Visited.count(Pred))
+ VisitedPreds.push_back(Pred);
+ }
+
+ // No preds visited yet.
+ if (VisitedPreds.empty()) {
+ auto It = LiveIn.try_emplace(&BB, BlockInfo());
+ bool DidInsert = It.second;
+ if (DidInsert)
+ It.first->second.init(TrackedVariablesVectorSize);
+ return /*Changed*/ DidInsert;
+ }
- auto PredLiveOut = LiveOut.find(Pred);
- // Pred must have been processed already. See comment at start of this loop.
- assert(PredLiveOut != LiveOut.end());
+ // Exactly one visited pred. Copy the LiveOut from that pred into BB LiveIn.
+ if (VisitedPreds.size() == 1) {
+ const BlockInfo &PredLiveOut = LiveOut.find(VisitedPreds[0])->second;
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
- // Perform the join of BBLiveIn (current live-in info) and PrevLiveOut.
- if (FirstJoin)
- BBLiveIn = PredLiveOut->second;
+ // Check if there isn't an entry, or there is but the LiveIn set has
+ // changed (expensive check).
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.insert(std::make_pair(&BB, PredLiveOut));
+ else if (PredLiveOut != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = PredLiveOut;
else
- BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
- FirstJoin = false;
+ return /*Changed*/ false;
+ return /*Changed*/ true;
+ }
+
+ // More than one pred. Join LiveOuts of blocks 1 and 2.
+ assert(VisitedPreds.size() > 1);
+ const BlockInfo &PredLiveOut0 = LiveOut.find(VisitedPreds[0])->second;
+ const BlockInfo &PredLiveOut1 = LiveOut.find(VisitedPreds[1])->second;
+ BlockInfo BBLiveIn = joinBlockInfo(PredLiveOut0, PredLiveOut1);
+
+ // Join the LiveOuts of subsequent blocks.
+ ArrayRef Tail = ArrayRef(VisitedPreds).drop_front(2);
+ for (const BasicBlock *Pred : Tail) {
+ const auto &PredLiveOut = LiveOut.find(Pred);
+ assert(PredLiveOut != LiveOut.end() &&
+ "block should have been processed already");
+ BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
}
+ // Save the joined result for BB.
auto CurrentLiveInEntry = LiveIn.find(&BB);
// Check if there isn't an entry, or there is but the LiveIn set has changed
// (expensive check).
- if (CurrentLiveInEntry == LiveIn.end() ||
- BBLiveIn != CurrentLiveInEntry->second) {
- LiveIn[&BB] = std::move(BBLiveIn);
- // A change has occured.
- return true;
- }
- // No change.
- return false;
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.try_emplace(&BB, std::move(BBLiveIn));
+ else if (BBLiveIn != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = std::move(BBLiveIn);
+ else
+ return /*Changed*/ false;
+ return /*Changed*/ true;
}
/// Return true if A fully contains B.
@@ -1823,7 +1935,13 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
/// y does not contain all overlaps because partial overlaps are excluded.
///
/// While we're iterating over the function, add single location defs for
-/// dbg.declares to \p FnVarLocs
+/// dbg.declares to \p FnVarLocs.
+///
+/// Variables that are interesting to this pass in are added to
+/// FnVarLocs->Variables first. TrackedVariablesVectorSize is set to the ID of
+/// the last interesting variable plus 1, meaning variables with ID 1
+/// (inclusive) to TrackedVariablesVectorSize (exclusive) are interesting. The
+/// subsequent variables are either stack homed or fully promoted.
///
/// Finally, populate UntaggedStoreVars with a mapping of untagged stores to
/// the stored-to variable fragments.
@@ -1832,7 +1950,9 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
/// to iterate over the function as they can be achieved together in one pass.
static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
Function &Fn, FunctionVarLocsBuilder *FnVarLocs,
- AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars) {
+ const DenseSet<DebugAggregate> &VarsWithStackSlot,
+ AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars,
+ unsigned &TrackedVariablesVectorSize) {
DenseSet<DebugVariable> Seen;
// Map of Variable: [Fragments].
DenseMap<DebugAggregate, SmallVector<DebugVariable, 8>> FragmentMap;
@@ -1843,14 +1963,16 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
// UntaggedStoreVars.
// We need to add fragments for untagged stores too so that we can correctly
// clobber overlapped fragment locations later.
+ SmallVector<DbgDeclareInst *> Declares;
for (auto &BB : Fn) {
for (auto &I : BB) {
if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
- FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
- DDI->getDebugLoc(), DDI->getAddress());
+ Declares.push_back(DDI);
} else if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
DebugVariable DV = DebugVariable(DII);
DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
if (Seen.insert(DV).second)
FragmentMap[DA].push_back(DV);
} else if (auto Info = getUntaggedStoreAssignmentInfo(
@@ -1875,6 +1997,8 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo,
DAI->getDebugLoc().getInlinedAt());
DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
// Cache this info for later.
UntaggedStoreVars[&I].push_back(
@@ -1887,21 +2011,22 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
}
}
- // Sort the fragment map for each DebugAggregate in non-descending
- // order of fragment size. Assert no entries are duplicates.
+ // Sort the fragment map for each DebugAggregate in ascending
+ // order of fragment size - there should be no duplicates.
for (auto &Pair : FragmentMap) {
SmallVector<DebugVariable, 8> &Frags = Pair.second;
- std::sort(
- Frags.begin(), Frags.end(), [](DebugVariable Next, DebugVariable Elmt) {
- assert(!(Elmt.getFragmentOrDefault() == Next.getFragmentOrDefault()));
- return Elmt.getFragmentOrDefault().SizeInBits >
- Next.getFragmentOrDefault().SizeInBits;
- });
+ std::sort(Frags.begin(), Frags.end(),
+ [](const DebugVariable &Next, const DebugVariable &Elmt) {
+ return Elmt.getFragmentOrDefault().SizeInBits >
+ Next.getFragmentOrDefault().SizeInBits;
+ });
+ // Check for duplicates.
+ assert(std::adjacent_find(Frags.begin(), Frags.end()) == Frags.end());
}
// Build the map.
AssignmentTrackingLowering::OverlapMap Map;
- for (auto Pair : FragmentMap) {
+ for (auto &Pair : FragmentMap) {
auto &Frags = Pair.second;
for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) {
DIExpression::FragmentInfo Frag = It->getFragmentOrDefault();
@@ -1922,6 +2047,15 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
}
}
+ // VariableIDs are 1-based so the variable-tracking bitvector needs
+ // NumVariables plus 1 bits.
+ TrackedVariablesVectorSize = FnVarLocs->getNumVariables() + 1;
+
+ // Finally, insert the declares afterwards, so the first IDs are all
+ // partially stack homed vars.
+ for (auto *DDI : Declares)
+ FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
+ DDI->getDebugLoc(), DDI->getWrappedLocation());
return Map;
}
@@ -1942,8 +2076,9 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
// Note that this pass doesn't handle partial overlaps correctly (FWIW
// neither does LiveDebugVariables) because that is difficult to do and
// appears to be rare occurance.
- VarContains =
- buildOverlapMapAndRecordDeclares(Fn, FnVarLocs, UntaggedStoreVars);
+ VarContains = buildOverlapMapAndRecordDeclares(
+ Fn, FnVarLocs, *VarsWithStackSlot, UntaggedStoreVars,
+ TrackedVariablesVectorSize);
// Prepare for traversal.
ReversePostOrderTraversal<Function *> RPOT(&Fn);
@@ -2059,14 +2194,14 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
//
// Unless we've already done so, create the single location def now.
if (AlwaysStackHomed.insert(Aggr).second) {
- assert(isa<AllocaInst>(VarLoc.V));
+ assert(!VarLoc.Values.hasArgList());
// TODO: When more complex cases are handled VarLoc.Expr should be
// built appropriately rather than always using an empty DIExpression.
// The assert below is a reminder.
assert(Simple);
VarLoc.Expr = DIExpression::get(Fn.getContext(), std::nullopt);
DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
- FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.V);
+ FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.Values);
InsertedAnyIntrinsics = true;
}
}
@@ -2109,20 +2244,11 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs(
// already.
if (VarsWithStackSlot->contains(getAggregate(DVI)))
continue;
- // Wrapper to get a single value (or undef) from DVI.
- auto GetValue = [DVI]() -> Value * {
- // We can't handle variadic DIExpressions yet so treat those as
- // kill locations.
- if (DVI->isKillLocation() || DVI->getValue() == nullptr ||
- DVI->hasArgList())
- return PoisonValue::get(Type::getInt32Ty(DVI->getContext()));
- return DVI->getValue();
- };
Instruction *InsertBefore = I.getNextNode();
assert(InsertBefore && "Unexpected: debug intrinsics after a terminator");
FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI),
DVI->getExpression(), DVI->getDebugLoc(),
- GetValue());
+ DVI->getWrappedLocation());
InsertedAnyIntrinsics = true;
}
}
@@ -2140,15 +2266,14 @@ static bool
removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
FunctionVarLocsBuilder &FnVarLocs) {
bool Changed = false;
- SmallDenseSet<DebugVariable> VariableSet;
-
+ SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBits;
// Scan over the entire block, not just over the instructions mapped by
// FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
// instructions.
for (const Instruction &I : reverse(*BB)) {
if (!isa<DbgVariableIntrinsic>(I)) {
// Sequence of consecutive defs ended. Clear map for the next one.
- VariableSet.clear();
+ VariableDefinedBits.clear();
}
// Get the location defs that start just before this instruction.
@@ -2164,21 +2289,44 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
// Iterate over the existing defs in reverse.
for (auto RIt = Locs->rbegin(), REnd = Locs->rend(); RIt != REnd; ++RIt) {
NumDefsScanned++;
- const DebugVariable &Key = FnVarLocs.getVariable(RIt->VariableID);
- bool FirstDefOfFragment = VariableSet.insert(Key).second;
+ DebugAggregate Aggr =
+ getAggregate(FnVarLocs.getVariable(RIt->VariableID));
+ uint64_t SizeInBits = Aggr.first->getSizeInBits().value_or(0);
- // If the same variable fragment is described more than once it is enough
- // to keep the last one (i.e. the first found in this reverse iteration).
- if (FirstDefOfFragment) {
- // New def found: keep it.
+ if (SizeInBits == 0) {
+ // If the size is unknown (0) then keep this location def to be safe.
NewDefsReversed.push_back(*RIt);
- } else {
- // Redundant def found: throw it away. Since the wedge of defs is being
- // rebuilt, doing nothing is the same as deleting an entry.
- ChangedThisWedge = true;
- NumDefsRemoved++;
+ continue;
}
- continue;
+
+ // Only keep this location definition if it is not fully eclipsed by
+ // other definitions in this wedge that come after it
+
+ // Inert the bits the location definition defines.
+ auto InsertResult =
+ VariableDefinedBits.try_emplace(Aggr, BitVector(SizeInBits));
+ bool FirstDefinition = InsertResult.second;
+ BitVector &DefinedBits = InsertResult.first->second;
+
+ DIExpression::FragmentInfo Fragment =
+ RIt->Expr->getFragmentInfo().value_or(
+ DIExpression::FragmentInfo(SizeInBits, 0));
+ bool InvalidFragment = Fragment.endInBits() > SizeInBits;
+
+ // If this defines any previously undefined bits, keep it.
+ if (FirstDefinition || InvalidFragment ||
+ DefinedBits.find_first_unset_in(Fragment.startInBits(),
+ Fragment.endInBits()) != -1) {
+ if (!InvalidFragment)
+ DefinedBits.set(Fragment.startInBits(), Fragment.endInBits());
+ NewDefsReversed.push_back(*RIt);
+ continue;
+ }
+
+ // Redundant def found: throw it away. Since the wedge of defs is being
+ // rebuilt, doing nothing is the same as deleting an entry.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
}
// Un-reverse the defs and replace the wedge with the pruned version.
@@ -2204,7 +2352,8 @@ static bool
removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
FunctionVarLocsBuilder &FnVarLocs) {
bool Changed = false;
- DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
+ DenseMap<DebugVariable, std::pair<RawLocationWrapper, DIExpression *>>
+ VariableMap;
// Scan over the entire block, not just over the instructions mapped by
// FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
@@ -2229,9 +2378,9 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
// Update the map if we found a new value/expression describing the
// variable, or if the variable wasn't mapped already.
- if (VMI == VariableMap.end() || VMI->second.first != Loc.V ||
+ if (VMI == VariableMap.end() || VMI->second.first != Loc.Values ||
VMI->second.second != Loc.Expr) {
- VariableMap[Key] = {Loc.V, Loc.Expr};
+ VariableMap[Key] = {Loc.Values, Loc.Expr};
NewDefs.push_back(Loc);
continue;
}
@@ -2311,7 +2460,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
// Remove undef entries that are encountered before any non-undef
// intrinsics from the entry block.
- if (isa<UndefValue>(Loc.V) && !HasDefinedBits(Aggr, Var)) {
+ if (Loc.Values.isKillLocation(Loc.Expr) && !HasDefinedBits(Aggr, Var)) {
// Did not insert this Loc, which is the same as removing it.
NumDefsRemoved++;
ChangedThisWedge = true;
@@ -2381,7 +2530,8 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout,
}
if (Changed) {
- MemLocFragmentFill Pass(Fn, &VarsWithStackSlot);
+ MemLocFragmentFill Pass(Fn, &VarsWithStackSlot,
+ shouldCoalesceFragments(Fn));
Pass.run(FnVarLocs);
// Remove redundant entries. As well as reducing memory consumption and
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 8f71ec2b490c..80a0bb957cfc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1576,6 +1576,11 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
+ Builder.setIsFPConstrained(
+ AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
+
+ // FIXME: If FP exceptions are observable, we should force them off for the
+ // loop for the FP atomics.
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index e7e73606de07..6967ca5160c0 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -91,7 +91,7 @@ cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
cl::desc("The text prefix to use for cold basic block clusters"),
cl::init(".text.split."), cl::Hidden);
-cl::opt<bool> BBSectionsDetectSourceDrift(
+static cl::opt<bool> BBSectionsDetectSourceDrift(
"bbsections-detect-source-drift",
cl::desc("This checks if there is a fdo instr. profile hash "
"mismatch for this function"),
@@ -123,10 +123,16 @@ public:
} // end anonymous namespace
char BasicBlockSections::ID = 0;
-INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
- "Prepares for basic block sections, by splitting functions "
- "into clusters of basic blocks.",
- false, false)
+INITIALIZE_PASS_BEGIN(
+ BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
// This function updates and optimizes the branching instructions of every basic
// block in a given function to account for changes in the layout.
@@ -300,7 +306,7 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
if (Existing) {
MDTuple *Tuple = cast<MDTuple>(Existing);
for (const auto &N : Tuple->operands())
- if (cast<MDString>(N.get())->getString() == MetadataName)
+ if (N.equalsStr(MetadataName))
return true;
}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5bc8d82debc3..5dede452ec34 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -14,12 +14,17 @@
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include <llvm/ADT/STLExtras.h>
using namespace llvm;
@@ -35,13 +40,10 @@ bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
std::pair<bool, SmallVector<BBClusterInfo>>
BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
StringRef FuncName) const {
- std::pair<bool, SmallVector<BBClusterInfo>> cluster_info(false, {});
auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
- if (R != ProgramBBClusterInfo.end()) {
- cluster_info.second = R->second;
- cluster_info.first = true;
- }
- return cluster_info;
+ return R != ProgramBBClusterInfo.end()
+ ? std::pair(true, R->second)
+ : std::pair(false, SmallVector<BBClusterInfo>{});
}
// Basic Block Sections can be enabled for a subset of machine basic blocks.
@@ -49,17 +51,19 @@ BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
// block sections are desired. Additionally, machine basic block ids of the
// functions can also be specified for a finer granularity. Moreover, a cluster
// of basic blocks could be assigned to the same section.
+// Optionally, a debug-info filename can be specified for each function to allow
+// distinguishing internal-linkage functions of the same name.
// A file with basic block sections for all of function main and three blocks
// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// (Profile for function foo is only loaded when its debug-info filename
+// matches 'path/to/foo_file.cc').
// ----------------------------
// list.txt:
// !main
-// !foo
+// !foo M=path/to/foo_file.cc
// !!1 2
// !!4
-static Error getBBClusterInfo(const MemoryBuffer *MBuf,
- ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
- StringMap<StringRef> &FuncAliasMap) {
+Error BasicBlockSectionsProfileReader::ReadProfile() {
assert(MBuf);
line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
@@ -90,9 +94,10 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
break;
// Check for second "!" which indicates a cluster of basic blocks.
if (S.consume_front("!")) {
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
if (FI == ProgramBBClusterInfo.end())
- return invalidProfileError(
- "Cluster list does not follow a function name specifier.");
+ continue;
SmallVector<StringRef, 4> BBIDs;
S.split(BBIDs, ' ');
// Reset current cluster position.
@@ -112,18 +117,52 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
}
CurrentCluster++;
- } else { // This is a function name specifier.
+ } else {
+ // This is a function name specifier. It may include a debug info filename
+ // specifier starting with `M=`.
+ auto [AliasesStr, DIFilenameStr] = S.split(' ');
+ SmallString<128> DIFilename;
+ if (DIFilenameStr.startswith("M=")) {
+ DIFilename =
+ sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
+ if (DIFilename.empty())
+ return invalidProfileError("Empty module name specifier.");
+ } else if (!DIFilenameStr.empty()) {
+ return invalidProfileError("Unknown string found: '" + DIFilenameStr +
+ "'.");
+ }
// Function aliases are separated using '/'. We use the first function
// name for the cluster info mapping and delegate all other aliases to
// this one.
SmallVector<StringRef, 4> Aliases;
- S.split(Aliases, '/');
+ AliasesStr.split(Aliases, '/');
+ bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
+ auto It = FunctionNameToDIFilename.find(Alias);
+ // No match if this function name is not found in this module.
+ if (It == FunctionNameToDIFilename.end())
+ return false;
+ // Return a match if debug-info-filename is not specified. Otherwise,
+ // check for equality.
+ return DIFilename.empty() || It->second.equals(DIFilename);
+ });
+ if (!FunctionFound) {
+ // Skip the following profile by setting the profile iterator (FI) to
+ // the past-the-end element.
+ FI = ProgramBBClusterInfo.end();
+ continue;
+ }
for (size_t i = 1; i < Aliases.size(); ++i)
FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
// Prepare for parsing clusters of this function name.
// Start a new cluster map for this function name.
- FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
+ auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
+ // Report error when multiple profiles have been specified for the same
+ // function.
+ if (!R.second)
+ return invalidProfileError("Duplicate profile for function '" +
+ Aliases.front() + "'.");
+ FI = R.first;
CurrentCluster = 0;
FuncBBIDs.clear();
}
@@ -131,11 +170,28 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
return Error::success();
}
-void BasicBlockSectionsProfileReader::initializePass() {
+bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
if (!MBuf)
- return;
- if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
+ return false;
+ // Get the function name to debug info filename mapping.
+ FunctionNameToDIFilename.clear();
+ for (const Function &F : M) {
+ SmallString<128> DIFilename;
+ if (F.isDeclaration())
+ continue;
+ DISubprogram *Subprogram = F.getSubprogram();
+ if (Subprogram) {
+ llvm::DICompileUnit *CU = Subprogram->getUnit();
+ if (CU)
+ DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
+ }
+ [[maybe_unused]] bool inserted =
+ FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
+ assert(inserted);
+ }
+ if (auto Err = ReadProfile())
report_fatal_error(std::move(Err));
+ return false;
}
ImmutablePass *
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index d491691135dc..3830f25debaf 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -860,6 +860,14 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
for (Register Reg : NewLiveIns) {
if (!LiveRegs.available(*MRI, Reg))
continue;
+
+ // Skip the register if we are about to add one of its super registers.
+ // TODO: Common this up with the same logic in addLineIns().
+ if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) {
+ return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg);
+ }))
+ continue;
+
DebugLoc DL;
BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
Reg);
@@ -1207,7 +1215,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
MadeChange |= OptimizeBlock(&MBB);
// If it is dead, remove it.
- if (MBB.pred_empty()) {
+ if (MBB.pred_empty() && !MBB.isMachineBlockAddressTaken()) {
RemoveDeadBlock(&MBB);
MadeChange = true;
++NumDeadBlocks;
@@ -1507,42 +1515,43 @@ ReoptimizeBlock:
}
}
- bool OptForSize =
- MF.getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
- if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
- // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
- // direction, thereby defeating careful block placement and regressing
- // performance. Therefore, only consider this for optsize functions.
+ if (!IsEmptyBlock(MBB)) {
MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
if (TII->isUnconditionalTailCall(TailCall)) {
- MachineBasicBlock *Pred = *MBB->pred_begin();
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- bool PredAnalyzable =
- !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
-
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
- PredTBB != PredFBB) {
- // The predecessor has a conditional branch to this block which consists
- // of only a tail call. Try to fold the tail call into the conditional
- // branch.
- if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch instruction so replaceBranchWithTailCall() doesn't
- // have to search for it.
- TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
- ++NumTailCalls;
- Pred->removeSuccessor(MBB);
- MadeChange = true;
- return MadeChange;
+ SmallVector<MachineBasicBlock *> PredsChanged;
+ for (auto &Pred : MBB->predecessors()) {
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ // Only eliminate if MBB == TBB (Taken Basic Block)
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
+ PredTBB != PredFBB) {
+ // The predecessor has a conditional branch to this block which
+ // consists of only a tail call. Try to fold the tail call into the
+ // conditional branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch instruction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ PredsChanged.push_back(Pred);
+ }
}
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ if (!PredsChanged.empty()) {
+ NumTailCalls += PredsChanged.size();
+ for (auto &Pred : PredsChanged)
+ Pred->removeSuccessor(MBB);
+
+ return true;
}
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
}
}
@@ -1876,8 +1885,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
} else {
if (Uses.erase(Reg)) {
if (Reg.isPhysical()) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ for (MCPhysReg SubReg : TRI->subregs(Reg))
+ Uses.erase(SubReg); // Use sub-registers to be conservative
}
}
addRegAndItsAliases(Reg, TRI, Defs);
@@ -1988,8 +1997,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
// Remove kills from ActiveDefsSet, these registers had short live ranges.
- for (const MachineOperand &MO : TIB->operands()) {
- if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ for (const MachineOperand &MO : TIB->all_uses()) {
+ if (!MO.isKill())
continue;
Register Reg = MO.getReg();
if (!Reg)
@@ -2006,8 +2015,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
}
// Track local defs so we can update liveins.
- for (const MachineOperand &MO : TIB->operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ for (const MachineOperand &MO : TIB->all_defs()) {
+ if (MO.isDead())
continue;
Register Reg = MO.getReg();
if (!Reg || Reg.isVirtual())
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index d0b6ed5ebe05..63b2ef04b21b 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -113,15 +113,15 @@ class TargetRegisterInfo;
};
std::vector<SameTailElt> SameTails;
- bool AfterBlockPlacement;
- bool EnableTailMerge;
- bool EnableHoistCommonCode;
- bool UpdateLiveIns;
+ bool AfterBlockPlacement = false;
+ bool EnableTailMerge = false;
+ bool EnableHoistCommonCode = false;
+ bool UpdateLiveIns = false;
unsigned MinCommonTailLength;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineLoopInfo *MLI = nullptr;
LivePhysRegs LiveRegs;
private:
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 016c81dc5aa4..05494f1ddc67 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -81,9 +81,9 @@ class BranchRelaxation : public MachineFunctionPass {
std::unique_ptr<RegScavenger> RS;
LivePhysRegs LiveRegs;
- MachineFunction *MF;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
bool relaxBranchInstructions();
void scanFunction();
@@ -132,6 +132,19 @@ void BranchRelaxation::verify() {
assert(BlockInfo[Num].Size == computeBlockSize(MBB));
PrevNum = Num;
}
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+ J != MBB.end(); J = std::next(J)) {
+ MachineInstr &MI = *J;
+ if (!MI.isConditionalBranch() && !MI.isUnconditionalBranch())
+ continue;
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ continue;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ assert(isBlockInRange(MI, *DestBB));
+ }
+ }
#endif
}
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 310273173647..618e41894b29 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
@@ -34,9 +35,9 @@ namespace llvm {
class BreakFalseDeps : public MachineFunctionPass {
private:
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
RegisterClassInfo RegClassInfo;
/// List of undefined register reads in this block in forward order.
@@ -45,7 +46,7 @@ private:
/// Storage for register unit liveness.
LivePhysRegs LiveRegSet;
- ReachingDefAnalysis *RDA;
+ ReachingDefAnalysis *RDA = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -123,9 +124,9 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
MCRegister OriginalReg = MO.getReg().asMCReg();
// Update only undef operands that have reg units that are mapped to one root.
- for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(OriginalReg)) {
unsigned NumRoots = 0;
- for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
NumRoots++;
if (NumRoots > 1)
return false;
@@ -139,9 +140,8 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// If the instruction has a true dependency, we can hide the false depdency
// behind it.
- for (MachineOperand &CurrMO : MI->operands()) {
- if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
- !OpRC->contains(CurrMO.getReg()))
+ for (MachineOperand &CurrMO : MI->all_uses()) {
+ if (CurrMO.isUndef() || !OpRC->contains(CurrMO.getReg()))
continue;
// We found a true dependency - replace the undef register with the true
// dependency.
@@ -290,10 +290,16 @@ bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
+ // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions
+ // in them.
+ df_iterator_default_set<MachineBasicBlock *> Reachable;
+ for (MachineBasicBlock *MBB : depth_first_ext(&mf, Reachable))
+ (void)MBB /* Mark all reachable blocks */;
+
// Traverse the basic blocks.
- for (MachineBasicBlock &MBB : mf) {
- processBasicBlock(&MBB);
- }
+ for (MachineBasicBlock &MBB : mf)
+ if (Reachable.count(&MBB))
+ processBasicBlock(&MBB);
return false;
}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 25741686a829..6a024287f002 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -141,6 +141,7 @@ INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter",
FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// Initial CFA offset value i.e. the one valid at the beginning of the
// function.
int InitialOffset =
@@ -149,7 +150,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
// function.
Register InitialRegister =
MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ InitialRegister = TRI.getDwarfRegNum(InitialRegister, true);
unsigned NumRegs = TRI.getNumRegs();
// Initialize MBBMap.
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 615687abad81..0377bc002067 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -97,7 +97,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
// Trace copies introduced by live range splitting. The inline
// spiller can rematerialize through these copies, so the spill
// weight must reflect this.
- while (MI->isFullCopy()) {
+ while (TII.isFullCopyInstr(*MI)) {
// The copy destination must match the interval register.
if (MI->getOperand(0).getReg() != Reg)
return false;
@@ -133,7 +133,7 @@ bool VirtRegAuxInfo::isLiveAtStatepointVarArg(LiveInterval &LI) {
MachineInstr *MI = MO.getParent();
if (MI->getOpcode() != TargetOpcode::STATEPOINT)
return false;
- return StatepointOpers(MI).getVarIdx() <= MI->getOperandNo(&MO);
+ return StatepointOpers(MI).getVarIdx() <= MO.getOperandNo();
});
}
@@ -157,7 +157,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
unsigned NumInstr = 0; // Number of instructions using LI
SmallPtrSet<MachineInstr *, 8> Visited;
- std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
+ std::pair<unsigned, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
if (LI.isSpillable()) {
Register Reg = LI.reg();
@@ -224,7 +224,16 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
continue;
NumInstr++;
- if (MI->isIdentityCopy() || MI->isImplicitDef())
+ bool identityCopy = false;
+ auto DestSrc = TII.isCopyInstr(*MI);
+ if (DestSrc) {
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+ identityCopy = DestRegOp->getReg() == SrcRegOp->getReg() &&
+ DestRegOp->getSubReg() == SrcRegOp->getSubReg();
+ }
+
+ if (identityCopy || MI->isImplicitDef())
continue;
if (!Visited.insert(MI).second)
continue;
@@ -258,7 +267,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
}
// Get allocation hints from copies.
- if (!MI->isCopy())
+ if (!TII.isCopyInstr(*MI))
continue;
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
if (!HintReg)
diff --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp
new file mode 100644
index 000000000000..db243a0bfebe
--- /dev/null
+++ b/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -0,0 +1,231 @@
+//===-- CallBrPrepare - Prepare callbr for code generation ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
+// codegen.
+//
+// In particular, this pass assists in inserting register copies for the output
+// values of a callbr along the edges leading to the indirect target blocks.
+// Though the output SSA value is defined by the callbr instruction itself in
+// the IR representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
+//
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition in
+// each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to the
+// appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level intrinsic.
+// But, within the current framework, it’s simpler to implement as an IR pass.
+// (If support for callbr in GlobalISel is implemented, it’s worth considering
+// whether this is still required.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "callbrprepare"
+
+namespace {
+
+class CallBrPrepare : public FunctionPass {
+ bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+ bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const;
+ void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const;
+
+public:
+ CallBrPrepare() : FunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &Fn) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char CallBrPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+
+FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
+
+void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
+ SmallVector<CallBrInst *, 2> CBRs;
+ for (BasicBlock &BB : Fn)
+ if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
+ if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
+ CBRs.push_back(CBR);
+ return CBRs;
+}
+
+bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) {
+ bool Changed = false;
+ CriticalEdgeSplittingOptions Options(&DT);
+ Options.setMergeIdenticalEdges();
+
+ // The indirect destination might be duplicated between another parameter...
+ // %0 = callbr ... [label %x, label %x]
+ // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
+ // to split the default destination if it's duplicated between an indirect
+ // destination...
+ // %1 = callbr ... to label %x [label %x]
+ // ...hence starting at 1 and checking against successor 0 (aka the default
+ // destination).
+ for (CallBrInst *CBR : CBRs)
+ for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
+ if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
+ isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
+ if (SplitKnownCriticalEdge(CBR, i, Options))
+ Changed = true;
+ return Changed;
+}
+
+bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const {
+ bool Changed = false;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ IRBuilder<> Builder(CBRs[0]->getContext());
+ for (CallBrInst *CBR : CBRs) {
+ if (!CBR->getNumIndirectDests())
+ continue;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(CBR->getType(), CBR->getName());
+ SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
+ SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+
+ for (BasicBlock *IndDest : CBR->getIndirectDests()) {
+ if (!Visited.insert(IndDest).second)
+ continue;
+ Builder.SetInsertPoint(&*IndDest->begin());
+ CallInst *Intrinsic = Builder.CreateIntrinsic(
+ CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
+ SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
+ UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
+ const auto *I = dyn_cast<Instruction>(U.getUser());
+ return I && I->getParent() == BB;
+}
+
+#ifndef NDEBUG
+static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
+ const BasicBlock *BB, bool IsDefaultDest) {
+ if (!isa<Instruction>(U.getUser()))
+ return;
+ LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
+ << cast<Instruction>(U.getUser())->getParent()->getName()
+ << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
+ << "dominated by " << BB->getName() << " ("
+ << (IsDefaultDest ? "in" : "") << "direct)\n");
+}
+#endif
+
+void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
+ CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const {
+
+ SmallPtrSet<Use *, 4> Visited;
+ BasicBlock *DefaultDest = CBR->getDefaultDest();
+ BasicBlock *LandingPad = Intrinsic->getParent();
+
+ SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
+ for (Use *U : Uses) {
+ if (!Visited.insert(U).second)
+ continue;
+
+#ifndef NDEBUG
+ PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
+ PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
+#endif
+
+ // Don't rewrite the use in the newly inserted intrinsic.
+ if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
+ if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
+ continue;
+
+ // If the Use is in the same BasicBlock as the Intrinsic call, replace
+ // the Use with the value of the Intrinsic call.
+ if (IsInSameBasicBlock(*U, LandingPad)) {
+ U->set(Intrinsic);
+ continue;
+ }
+
+ // If the Use is dominated by the default dest, do not touch it.
+ if (DT.dominates(DefaultDest, *U))
+ continue;
+
+ SSAUpdate.RewriteUse(*U);
+ }
+}
+
+bool CallBrPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+ SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
+
+ if (CBRs.empty())
+ return Changed;
+
+ // It's highly likely that most programs do not contain CallBrInsts. Follow a
+ // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
+ // domtree analysis if available, otherwise compute it lazily. This avoids
+ // forcing Dominator Tree Construction at -O0 for programs that likely do not
+ // contain CallBrInsts. It does pessimize programs with callbr at higher
+ // optimization levels, as the DominatorTree created here is not reused by
+ // subsequent passes.
+ DominatorTree *DT;
+ std::optional<DominatorTree> LazilyComputedDomTree;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ else {
+ LazilyComputedDomTree.emplace(Fn);
+ DT = &*LazilyComputedDomTree;
+ }
+
+ if (SplitCriticalEdges(CBRs, *DT))
+ Changed = true;
+
+ if (InsertIntrinsicCalls(CBRs, *DT))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index ce1ef571c9df..b7152587a9fa 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -25,12 +25,15 @@
using namespace llvm;
-CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
- : CallingConv(CC), IsVarArg(isVarArg), MF(mf),
- TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
+CCState::CCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &Locs, LLVMContext &Context,
+ bool NegativeOffsets)
+ : CallingConv(CC), IsVarArg(IsVarArg), MF(MF),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(Locs), Context(Context),
+ NegativeOffsets(NegativeOffsets) {
+
// No stack is used.
- StackOffset = 0;
+ StackSize = 0;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -51,7 +54,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
ensureMaxAlignment(Alignment);
MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Alignment);
Size = unsigned(alignTo(Size, MinAlign));
- unsigned Offset = AllocateStack(Size, Alignment);
+ uint64_t Offset = AllocateStack(Size, Alignment);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
@@ -129,7 +132,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
+ << ArgVT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -147,7 +150,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
+ << ArgVT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -164,7 +167,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
+ << VT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -176,7 +179,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << EVT(VT).getEVTString() << '\n';
+ << VT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -197,7 +200,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
- unsigned SavedStackOffset = StackOffset;
+ uint64_t SavedStackSize = StackSize;
Align SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
@@ -212,7 +215,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
do {
if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
- dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
+ dbgs() << "Call has unhandled type " << VT
<< " while computing remaining regparms\n";
#endif
llvm_unreachable(nullptr);
@@ -229,7 +232,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// Clear the assigned values and stack memory. We leave the registers marked
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
- StackOffset = SavedStackOffset;
+ StackSize = SavedStackSize;
MaxStackArgAlign = SavedMaxStackArgAlign;
Locs.truncate(NumLocs);
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 398ff56f737c..6272b654b329 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
@@ -24,6 +23,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
+ initializeBreakFalseDepsPass(Registry);
+ initializeCallBrPreparePass(Registry);
initializeCFGuardLongjmpPass(Registry);
initializeCFIFixupPass(Registry);
initializeCFIInstrInserterPass(Registry);
@@ -48,7 +49,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
- initializeHardwareLoopsPass(Registry);
+ initializeHardwareLoopsLegacyPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
@@ -140,7 +141,3 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeWinEHPreparePass(Registry);
initializeXRayInstrumentationPass(Registry);
}
-
-void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
- initializeCodeGen(*unwrap(R));
-}
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index a5215969c0dd..577c5dbc8e2d 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,11 +173,11 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
-unsigned llvm::getInvertedFPClassTest(unsigned Test) {
- unsigned InvertedTest = ~Test & fcAllFlags;
- switch (InvertedTest) {
- default:
- break;
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+ FPClassTest InvertedTest = ~Test;
+ // Pick the direction with fewer tests
+ // TODO: Handle more combinations of cases that can be handled together
+ switch (static_cast<unsigned>(InvertedTest)) {
case fcNan:
case fcSNan:
case fcQNan:
@@ -196,9 +196,15 @@ unsigned llvm::getInvertedFPClassTest(unsigned Test) {
case fcFinite:
case fcPosFinite:
case fcNegFinite:
+ case fcZero | fcNan:
+ case fcSubnormal | fcZero:
+ case fcSubnormal | fcZero | fcNan:
return InvertedTest;
+ default:
+ return fcNone;
}
- return 0;
+
+ llvm_unreachable("covered FPClassTest");
}
static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index dd431cc6f4f5..b00df0b6c6cb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -82,7 +83,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -257,13 +257,17 @@ static cl::opt<bool>
"CodeGenPrepare."));
static cl::opt<bool>
- OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+ OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
cl::desc("Enable converting phi types in CodeGenPrepare"));
static cl::opt<unsigned>
HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
cl::desc("Least BB number of huge function."));
+static cl::opt<unsigned>
+ MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
+ cl::Hidden,
+ cl::desc("Max number of address users to look at"));
namespace {
enum ExtType {
@@ -294,16 +298,16 @@ class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *SubtargetInfo;
+ const TargetSubtargetInfo *SubtargetInfo = nullptr;
const TargetLowering *TLI = nullptr;
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
- const TargetLibraryInfo *TLInfo;
- const LoopInfo *LI;
+ const TargetLibraryInfo *TLInfo = nullptr;
+ LoopInfo *LI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
+ ProfileSummaryInfo *PSI = nullptr;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -373,6 +377,15 @@ public:
bool runOnFunction(Function &F) override;
+ void releaseMemory() override {
+ // Clear per function information.
+ InsertedInsts.clear();
+ PromotedInsts.clear();
+ FreshBBs.clear();
+ BPI.reset();
+ BFI.reset();
+ }
+
StringRef getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -413,7 +426,7 @@ private:
void removeAllAssertingVHReferences(Value *V);
bool eliminateAssumptions(Function &F);
- bool eliminateFallThrough(Function &F);
+ bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -494,10 +507,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
DL = &F.getParent()->getDataLayout();
bool EverMadeChange = false;
- // Clear per function information.
- InsertedInsts.clear();
- PromotedInsts.clear();
- FreshBBs.clear();
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -574,11 +583,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Because the basic algorithm's complex is near O(N!).
IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+ // Transformations above may invalidate dominator tree and/or loop info.
+ DT.reset();
+ LI->releaseMemory();
+ LI->analyze(getDT(F));
+
bool MadeChange = true;
bool FuncIterated = false;
while (MadeChange) {
MadeChange = false;
- DT.reset();
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
if (FuncIterated && !FreshBBs.contains(&BB))
@@ -587,6 +600,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+
MadeChange |= Changed;
if (IsHugeFunc) {
// If the BB is updated, it may still has chance to be optimized.
@@ -602,9 +618,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
FreshBBs.insert(&BB);
else if (FuncIterated)
FreshBBs.erase(&BB);
-
- if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
- DT.reset();
} else {
// For small/normal functions, we restart BB iteration if the dominator
// tree of the Function was changed.
@@ -622,7 +635,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= optimizePhiTypes(F);
if (MadeChange)
- eliminateFallThrough(F);
+ eliminateFallThrough(F, DT.get());
+
+#ifndef NDEBUG
+ if (MadeChange && VerifyLoopInfo)
+ LI->verify(getDT(F));
+#endif
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
@@ -755,7 +773,7 @@ void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
/// Merge basic blocks which are connected by a single edge, where one of the
/// basic blocks has a single successor pointing to the other basic block,
/// which has a single predecessor.
-bool CodeGenPrepare::eliminateFallThrough(Function &F) {
+bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
// Use a temporary array to avoid iterator being invalidated when
@@ -777,13 +795,19 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
continue;
+ // Make an effort to skip unreachable blocks.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
// Merge BB into SinglePred and delete it.
- MergeBlockIntoPredecessor(BB);
+ MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
+ /* MemDep */ nullptr,
+ /* PredecessorWithTwoSuccessors */ false, DT);
Preds.insert(SinglePred);
if (IsHugeFunc) {
@@ -1579,6 +1603,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
/// intrinsic. Return true if any changes were made.
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
ModifyDT &ModifiedDT) {
+ bool EdgeCase = false;
Value *A, *B;
BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
@@ -1587,11 +1612,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
A = Add->getOperand(0);
B = Add->getOperand(1);
+ EdgeCase = true;
}
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
TLI->getValueType(*DL, Add->getType()),
- Add->hasNUsesOrMore(2)))
+ Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
return false;
// We don't want to move around uses of condition values this late, so we
@@ -1660,7 +1686,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
if (!TLI->shouldFormOverflowOp(ISD::USUBO,
TLI->getValueType(*DL, Sub->getType()),
- Sub->hasNUsesOrMore(2)))
+ Sub->hasNUsesOrMore(1)))
return false;
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
@@ -1825,6 +1851,37 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
return true;
}
+/// Many architectures use the same instruction for both subtract and cmp. Try
+/// to swap cmp operands to match subtract operations to allow for CSE.
+static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
+ Value *Op0 = Cmp->getOperand(0);
+ Value *Op1 = Cmp->getOperand(1);
+ if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
+ isa<Constant>(Op1) || Op0 == Op1)
+ return false;
+
+ // If a subtract already has the same operands as a compare, swapping would be
+ // bad. If a subtract has the same operands as a compare but in reverse order,
+ // then swapping is good.
+ int GoodToSwap = 0;
+ unsigned NumInspected = 0;
+ for (const User *U : Op0->users()) {
+ // Avoid walking many users.
+ if (++NumInspected > 128)
+ return false;
+ if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
+ GoodToSwap++;
+ else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
+ GoodToSwap--;
+ }
+
+ if (GoodToSwap > 0) {
+ Cmp->swapOperands();
+ return true;
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1838,6 +1895,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldICmpWithDominatingICmp(Cmp, *TLI))
return true;
+ if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
+ return true;
+
return false;
}
@@ -2129,6 +2189,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
///
/// If the transform is performed, return true and set ModifiedDT to true.
static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ LoopInfo &LI,
const TargetLowering *TLI,
const DataLayout *DL, ModifyDT &ModifiedDT,
SmallSet<BasicBlock *, 32> &FreshBBs,
@@ -2168,6 +2229,13 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (IsHugeFunc)
FreshBBs.insert(EndBlock);
+ // Update the LoopInfo. The new blocks are in the same loop as the start
+ // block.
+ if (Loop *L = LI.getLoopFor(StartBlock)) {
+ L->addBasicBlockToLoop(CallBlock, LI);
+ L->addBasicBlockToLoop(EndBlock, LI);
+ }
+
// Set up a builder to create a compare, conditional branch, and PHI.
IRBuilder<> Builder(CountZeros->getContext());
Builder.SetInsertPoint(StartBlock->getTerminator());
@@ -2279,7 +2347,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
- return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
+ return true;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
@@ -2341,7 +2410,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
- return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs,
+ return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
IsHugeFunc);
case Intrinsic::fshl:
case Intrinsic::fshr:
@@ -2349,24 +2418,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::dbg_assign:
case Intrinsic::dbg_value:
return fixupDbgValue(II);
- case Intrinsic::vscale: {
- // If datalayout has no special restrictions on vector data layout,
- // replace `llvm.vscale` by an equivalent constant expression
- // to benefit from cheap constant propagation.
- Type *ScalableVectorTy =
- VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
- if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) {
- auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
- auto *One = ConstantInt::getSigned(II->getType(), 1);
- auto *CGep =
- ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
- replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()),
- FreshBBs, IsHugeFunc);
- II->eraseFromParent();
- return true;
- }
- break;
- }
case Intrinsic::masked_gather:
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
@@ -2442,6 +2493,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (!RetI)
return false;
+ assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
+
PHINode *PN = nullptr;
ExtractValueInst *EVI = nullptr;
BitCastInst *BCI = nullptr;
@@ -2687,7 +2740,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
if (InBounds)
OS << "inbounds ";
if (BaseGV) {
- OS << (NeedPlus ? " + " : "") << "GV:";
+ OS << "GV:";
BaseGV->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
@@ -3073,6 +3126,9 @@ class TypePromotionTransaction {
~InstructionRemover() override { delete Replacer; }
+ InstructionRemover &operator=(const InstructionRemover &other) = delete;
+ InstructionRemover(const InstructionRemover &other) = delete;
+
/// Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -3258,7 +3314,7 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
/// True if we are optimizing for size.
- bool OptSize;
+ bool OptSize = false;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *BFI;
@@ -3574,10 +3630,15 @@ private:
/// Original Address.
Value *Original;
+ /// Common value among addresses
+ Value *CommonValue = nullptr;
+
public:
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
: SQ(_SQ), Original(OriginalValue) {}
+ ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
+
/// Get the combined AddrMode
const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
@@ -3662,13 +3723,21 @@ public:
if (!initializeMap(Map))
return false;
- Value *CommonValue = findCommon(Map);
+ CommonValue = findCommon(Map);
if (CommonValue)
AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
return CommonValue != nullptr;
}
private:
+ /// `CommonValue` may be a placeholder inserted by us.
+ /// If the placeholder is not used, we should remove this dead instruction.
+ void eraseCommonValueIfDead() {
+ if (CommonValue && CommonValue->getNumUses() == 0)
+ if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->eraseFromParent();
+ }
+
/// Initialize Map with anchor values. For address seen
/// we set the value of different field saw in this address.
/// At the same time we find a common type for different field we will
@@ -3866,17 +3935,17 @@ private:
SimplificationTracker &ST) {
while (!TraverseOrder.empty()) {
Value *Current = TraverseOrder.pop_back_val();
- assert(Map.find(Current) != Map.end() && "No node to fill!!!");
+ assert(Map.contains(Current) && "No node to fill!!!");
Value *V = Map[Current];
if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
// CurrentValue also must be Select.
auto *CurrentSelect = cast<SelectInst>(Current);
auto *TrueValue = CurrentSelect->getTrueValue();
- assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+ assert(Map.contains(TrueValue) && "No True Value!");
Select->setTrueValue(ST.Get(Map[TrueValue]));
auto *FalseValue = CurrentSelect->getFalseValue();
- assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+ assert(Map.contains(FalseValue) && "No False Value!");
Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
@@ -3884,7 +3953,7 @@ private:
// Fill the Phi node with values from predecessors.
for (auto *B : predecessors(PHI->getParent())) {
Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
- assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+ assert(Map.contains(PV) && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
}
}
@@ -3908,7 +3977,7 @@ private:
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
// if it is already visited or it is an ending value then skip it.
- if (Map.find(Current) != Map.end())
+ if (Map.contains(Current))
continue;
TraverseOrder.push_back(Current);
@@ -4627,7 +4696,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
return false;
}
case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ // Check to see if we can merge in one operand, then the other. If so, we
+ // win.
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
// Start a transaction at this point.
@@ -4637,9 +4707,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ // Try to match an integer constant second to increase its chance of ending
+ // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
+ int First = 0, Second = 1;
+ if (isa<ConstantInt>(AddrInst->getOperand(First))
+ && !isa<ConstantInt>(AddrInst->getOperand(Second)))
+ std::swap(First, Second);
AddrMode.InBounds = false;
- if (matchAddr(AddrInst->getOperand(1), Depth + 1) &&
- matchAddr(AddrInst->getOperand(0), Depth + 1))
+ if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(Second), Depth + 1))
return true;
// Restore the old addr mode info.
@@ -4647,9 +4723,10 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1) &&
- matchAddr(AddrInst->getOperand(1), Depth + 1))
+ // Otherwise this was over-aggressive. Try merging operands in the opposite
+ // order.
+ if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(First), Depth + 1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -4698,7 +4775,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantInt *CI =
dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
const APInt &CVal = CI->getValue();
- if (CVal.getMinSignedBits() <= 64) {
+ if (CVal.getSignificantBits() <= 64) {
ConstantOffset += CVal.getSExtValue() * TypeSize;
continue;
}
@@ -4718,36 +4795,35 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 ||
- TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
- // Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
if (!cast<GEPOperator>(AddrInst)->isInBounds())
AddrMode.InBounds = false;
return true;
- }
- } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
- TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
- ConstantOffset > 0) {
- // Record GEPs with non-zero offsets as candidates for splitting in the
- // event that the offset cannot fit into the r+i addressing mode.
- // Simple and common case that only one GEP is used in calculating the
- // address for the memory access.
- Value *Base = AddrInst->getOperand(0);
- auto *BaseI = dyn_cast<Instruction>(Base);
- auto *GEP = cast<GetElementPtrInst>(AddrInst);
- if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
- (BaseI && !isa<CastInst>(BaseI) &&
- !isa<GetElementPtrInst>(BaseI))) {
- // Make sure the parent block allows inserting non-PHI instructions
- // before the terminator.
- BasicBlock *Parent =
- BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (!Parent->getTerminator()->isEHPad())
- LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
- }
}
AddrMode.BaseOffs -= ConstantOffset;
+
+ if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+ TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+ ConstantOffset > 0) {
+ // Record GEPs with non-zero offsets as candidates for splitting in
+ // the event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
+ BasicBlock *Parent = BaseI ? BaseI->getParent()
+ : &GEP->getFunction()->getEntryBlock();
+ if (!Parent->getTerminator()->isEHPad())
+ LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+ }
+ }
+
return false;
}
@@ -4963,18 +5039,14 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
-// Max number of memory uses to look at before aborting the search to conserve
-// compile time.
-static constexpr int MaxMemoryUsesToScan = 20;
-
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, int SeenInsts = 0) {
+ BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4987,33 +5059,33 @@ static bool FindAllMemoryUses(
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
// of users. This avoids excessive compilation times in pathological cases.
- if (SeenInsts++ >= MaxMemoryUsesToScan)
+ if (SeenInsts++ >= MaxAddressUsersToScan)
return true;
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back({U.get(), LI->getType()});
+ MemoryUses.push_back({&U, LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
+ MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
+ MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
+ MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5045,6 +5117,17 @@ static bool FindAllMemoryUses(
return false;
}
+static bool FindAllMemoryUses(
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ unsigned SeenInsts = 0;
+ SmallPtrSet<Instruction *, 16> ConsideredInsts;
+ return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts);
+}
+
+
/// Return true if Val is already known to be live at the use site that we're
/// folding it into. If so, there is no cost to include it in the addressing
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
@@ -5126,10 +5209,8 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
- SmallPtrSet<Instruction *, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI,
- BFI))
+ SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;
+ if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -5142,8 +5223,9 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction *, 32> MatchedAddrModeInsts;
- for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
- Value *Address = Pair.first;
+ for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first->get();
+ Instruction *UserI = cast<Instruction>(Pair.first->getUser());
Type *AddressAccessTy = Pair.second;
unsigned AS = Address->getType()->getPointerAddressSpace();
@@ -5156,7 +5238,7 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
- AddressAccessTy, AS, MemoryInst, Result,
+ AddressAccessTy, AS, UserI, Result,
InsertedInsts, PromotedInsts, TPT,
LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
@@ -5693,7 +5775,8 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// Create a scalar GEP if there are more than 2 operands.
if (Ops.size() != 2) {
// Replace the last index with 0.
- Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Ops[FinalIndex] =
+ Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
SourceTy = GetElementPtrInst::getIndexedType(
SourceTy, ArrayRef(Ops).drop_front());
@@ -6027,6 +6110,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t Offset = LargeOffsetGEP->second;
if (Offset != BaseOffset) {
TargetLowering::AddrMode AddrMode;
+ AddrMode.HasBaseReg = true;
AddrMode.BaseOffs = Offset - BaseOffset;
// The result type of the GEP might not be the type of the memory
// access.
@@ -6044,7 +6128,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
// Generate a new GEP to replace the current one.
LLVMContext &Ctx = GEP->getContext();
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
Type *I8PtrTy =
Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);
@@ -6062,7 +6146,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
- SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
@@ -6074,7 +6158,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
- Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
@@ -6090,7 +6174,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
} else {
// Calculate the new offset for the new GEP.
- Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+ Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
if (GEP->getType() != I8PtrTy)
@@ -6872,9 +6956,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
return false;
TargetLowering::SelectSupportKind SelectKind;
- if (VectorCond)
- SelectKind = TargetLowering::VectorMaskSelect;
- else if (SI->getType()->isVectorTy())
+ if (SI->getType()->isVectorTy())
SelectKind = TargetLowering::ScalarCondVectorVal;
else
SelectKind = TargetLowering::ScalarValSelect;
@@ -6915,88 +6997,88 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// first branch will point directly to select.end, and the corresponding PHI
// predecessor block will be the start block.
- // First, we split the block containing the select into 2 blocks.
+ // Collect values that go on the true side and the values that go on the false
+ // side.
+ SmallVector<Instruction *> TrueInstrs, FalseInstrs;
+ for (SelectInst *SI : ASI) {
+ if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
+ TrueInstrs.push_back(cast<Instruction>(V));
+ if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
+ FalseInstrs.push_back(cast<Instruction>(V));
+ }
+
+ // Split the select block, according to how many (if any) values go on each
+ // side.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
- BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- if (IsHugeFunc)
- FreshBBs.insert(EndBlock);
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- // Delete the unconditional branch that was just created by the split.
- StartBlock->getTerminator()->eraseFromParent();
+ IRBuilder<> IB(SI);
+ auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- // These are the new basic blocks for the conditional branch.
- // At least one will become an actual new basic block.
BasicBlock *TrueBlock = nullptr;
BasicBlock *FalseBlock = nullptr;
+ BasicBlock *EndBlock = nullptr;
BranchInst *TrueBranch = nullptr;
BranchInst *FalseBranch = nullptr;
-
- // Sink expensive instructions into the conditional blocks to avoid executing
- // them speculatively.
- for (SelectInst *SI : ASI) {
- if (sinkSelectOperand(TTI, SI->getTrueValue())) {
- if (TrueBlock == nullptr) {
- TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
- EndBlock->getParent(), EndBlock);
- TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
- if (IsHugeFunc)
- FreshBBs.insert(TrueBlock);
- TrueBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *TrueInst = cast<Instruction>(SI->getTrueValue());
- TrueInst->moveBefore(TrueBranch);
- }
- if (sinkSelectOperand(TTI, SI->getFalseValue())) {
- if (FalseBlock == nullptr) {
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *FalseInst = cast<Instruction>(SI->getFalseValue());
- FalseInst->moveBefore(FalseBranch);
- }
+ if (TrueInstrs.size() == 0) {
+ FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
+ } else if (FalseInstrs.size() == 0) {
+ TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ TrueBlock = TrueBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ } else {
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+ nullptr, nullptr, LI);
+ TrueBranch = cast<BranchInst>(ThenTerm);
+ FalseBranch = cast<BranchInst>(ElseTerm);
+ TrueBlock = TrueBranch->getParent();
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ }
+
+ EndBlock->setName("select.end");
+ if (TrueBlock)
+ TrueBlock->setName("select.true.sink");
+ if (FalseBlock)
+ FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
+ : "select.false.sink");
+
+ if (IsHugeFunc) {
+ if (TrueBlock)
+ FreshBBs.insert(TrueBlock);
+ if (FalseBlock)
+ FreshBBs.insert(FalseBlock);
+ FreshBBs.insert(EndBlock);
}
- // If there was nothing to sink, then arbitrarily choose the 'false' side
- // for a new input value to the PHI.
- if (TrueBlock == FalseBlock) {
- assert(TrueBlock == nullptr &&
- "Unexpected basic block transform while optimizing select");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
+ static const unsigned MD[] = {
+ LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+ LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
+ StartBlock->getTerminator()->copyMetadata(*SI, MD);
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ for (Instruction *I : TrueInstrs)
+ I->moveBefore(TrueBranch);
+ for (Instruction *I : FalseInstrs)
+ I->moveBefore(FalseBranch);
- // Insert the real conditional branch based on the original condition.
// If we did not create a new block for one of the 'true' or 'false' paths
// of the condition, it means that side of the branch goes to the end block
// directly and the path originates from the start block from the point of
// view of the new PHI.
- BasicBlock *TT, *FT;
- if (TrueBlock == nullptr) {
- TT = EndBlock;
- FT = FalseBlock;
+ if (TrueBlock == nullptr)
TrueBlock = StartBlock;
- } else if (FalseBlock == nullptr) {
- TT = TrueBlock;
- FT = EndBlock;
+ else if (FalseBlock == nullptr)
FalseBlock = StartBlock;
- } else {
- TT = TrueBlock;
- FT = FalseBlock;
- }
- IRBuilder<> IB(SI);
- auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- IB.CreateCondBr(CondFr, TT, FT, SI);
SmallPtrSet<const Instruction *, 2> INS;
INS.insert(ASI.begin(), ASI.end());
@@ -7105,7 +7187,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
- // now. So we put the operands defs' BBs into FreshBBs to do optmization.
+ // now. So we put the operands defs' BBs into FreshBBs to do optimization.
for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
if (!OpDef)
@@ -7696,7 +7778,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
// whereas scalable vectors would have to be shifted by
// <2log(vscale) + number of bits> in order to store the
// low/high parts. Bailing out for now.
- if (isa<ScalableVectorType>(StoreType))
+ if (StoreType->isScalableTy())
return false;
if (!DL.typeSizeEqualsStoreSize(StoreType) ||
@@ -8051,8 +8133,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return true;
if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
- TLI->optimizeExtendOrTruncateConversion(I,
- LI->getLoopFor(I->getParent())))
+ TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -8064,7 +8146,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return SinkCast(CI);
} else {
if (TLI->optimizeExtendOrTruncateConversion(
- I, LI->getLoopFor(I->getParent())))
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
bool MadeChange = optimizeExt(I);
@@ -8128,7 +8210,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
GEPI->getName(), GEPI);
NC->setDebugLoc(GEPI->getDebugLoc());
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
- GEPI->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(
+ GEPI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
++NumGEPsElim;
optimizeInst(NC, ModifiedDT);
return true;
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 48cd8e998ec9..c34a52a6f2de 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -14,15 +14,15 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
using namespace llvm;
@@ -81,7 +81,7 @@ CGOPT(bool, StackSymbolOrdering)
CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
-CGOPT(bool, LowerGlobalDtorsViaCxaAtExit)
+CGOPT(bool, DisableIntegratedAS)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
@@ -89,7 +89,7 @@ CGOPT(bool, IgnoreXCOFFVisibility)
CGOPT(bool, XCOFFTracebackTable)
CGOPT(std::string, BBSections)
CGOPT(unsigned, TLSSize)
-CGOPT(bool, EmulatedTLS)
+CGOPT_EXP(bool, EmulatedTLS)
CGOPT(bool, UniqueSectionNames)
CGOPT(bool, UniqueBasicBlockSectionNames)
CGOPT(EABI, EABIVersion)
@@ -100,10 +100,11 @@ CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
CGOPT(bool, ForceDwarfFrameSection)
-CGOPT(bool, XRayOmitFunctionIndex)
+CGOPT(bool, XRayFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
CGOPT(unsigned, AlignLoops)
CGOPT(bool, JMCInstrument)
+CGOPT(bool, XCOFFReadOnlyPointers)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -241,14 +242,15 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableNoTrappingFPMath);
- static const auto DenormFlagEnumOptions =
- cl::values(clEnumValN(DenormalMode::IEEE, "ieee",
- "IEEE 754 denormal numbers"),
- clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
- "the sign of a flushed-to-zero number is preserved "
- "in the sign of 0"),
- clEnumValN(DenormalMode::PositiveZero, "positive-zero",
- "denormals are flushed to positive zero"));
+ static const auto DenormFlagEnumOptions = cl::values(
+ clEnumValN(DenormalMode::IEEE, "ieee", "IEEE 754 denormal numbers"),
+ clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
+ "the sign of a flushed-to-zero number is preserved "
+ "in the sign of 0"),
+ clEnumValN(DenormalMode::PositiveZero, "positive-zero",
+ "denormals are flushed to positive zero"),
+ clEnumValN(DenormalMode::Dynamic, "dynamic",
+ "denormals have unknown treatment"));
// FIXME: Doesn't have way to specify separate input and output modes.
static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath(
@@ -349,12 +351,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UseCtors);
- static cl::opt<bool> LowerGlobalDtorsViaCxaAtExit(
- "lower-global-dtors-via-cxa-atexit",
- cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"),
- cl::init(true));
- CGBINDOPT(LowerGlobalDtorsViaCxaAtExit);
-
static cl::opt<bool> RelaxELFRelocations(
"relax-elf-relocations",
cl::desc(
@@ -466,10 +462,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::desc("Always emit a debug frame section."), cl::init(false));
CGBINDOPT(ForceDwarfFrameSection);
- static cl::opt<bool> XRayOmitFunctionIndex(
- "no-xray-index", cl::desc("Don't emit xray_fn_idx section"),
- cl::init(false));
- CGBINDOPT(XRayOmitFunctionIndex);
+ static cl::opt<bool> XRayFunctionIndex("xray-function-index",
+ cl::desc("Emit xray_fn_idx section"),
+ cl::init(true));
+ CGBINDOPT(XRayFunctionIndex);
static cl::opt<bool> DebugStrictDwarf(
"strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
@@ -485,6 +481,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(JMCInstrument);
+ static cl::opt<bool> XCOFFReadOnlyPointers(
+ "mxcoff-roptr",
+ cl::desc("When set to true, const objects with relocatable address "
+ "values are put into the RO data section."),
+ cl::init(false));
+ CGBINDOPT(XCOFFReadOnlyPointers);
+
+ static cl::opt<bool> DisableIntegratedAS(
+ "no-integrated-as", cl::desc("Disable integrated assembler"),
+ cl::init(false));
+ CGBINDOPT(DisableIntegratedAS);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -538,7 +546,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
- Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit();
+ Options.DisableIntegratedAS = getDisableIntegratedAS();
Options.RelaxELFRelocations = getRelaxELFRelocations();
Options.DataSections =
getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
@@ -549,8 +557,8 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
Options.TLSSize = getTLSSize();
- Options.EmulatedTLS = getEmulatedTLS();
- Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
+ Options.EmulatedTLS =
+ getExplicitEmulatedTLS().value_or(TheTriple.hasDefaultEmulatedTLS());
Options.ExceptionModel = getExceptionModel();
Options.EmitStackSizeSection = getEnableStackSizeSection();
Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
@@ -558,10 +566,11 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
- Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
+ Options.XRayFunctionIndex = getXRayFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
Options.JMCInstrument = getJMCInstrument();
+ Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 9b1f7117fa57..02c67e500bdc 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -18,6 +18,11 @@
// pairs. Validity of each node is expected to be done upon creation, and any
// validation errors should halt traversal and prevent further graph
// construction.
+// Instead of relying on Shuffle operations, vector interleaving and
+// deinterleaving can be represented by vector.interleave2 and
+// vector.deinterleave2 intrinsics. Scalable vectors can be represented only by
+// these intrinsics, whereas, fixed-width vectors are recognized for both
+// shufflevector instruction and intrinsics.
//
// Replacement:
// This step traverses the graph built up by identification, delegating to the
@@ -62,6 +67,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -94,6 +100,13 @@ static bool isInterleavingMask(ArrayRef<int> Mask);
/// <1, 3, 5, 7>).
static bool isDeinterleavingMask(ArrayRef<int> Mask);
+/// Returns true if the operation is a negation of V, and it works for both
+/// integers and floats.
+static bool isNeg(Value *V);
+
+/// Returns the operand for negation operation.
+static Value *getNegOperand(Value *V);
+
namespace {
class ComplexDeinterleavingLegacyPass : public FunctionPass {
@@ -124,7 +137,7 @@ class ComplexDeinterleavingGraph;
struct ComplexDeinterleavingCompositeNode {
ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op,
- Instruction *R, Instruction *I)
+ Value *R, Value *I)
: Operation(Op), Real(R), Imag(I) {}
private:
@@ -134,22 +147,21 @@ private:
public:
ComplexDeinterleavingOperation Operation;
- Instruction *Real;
- Instruction *Imag;
+ Value *Real;
+ Value *Imag;
- // Instructions that should only exist within this node, there should be no
- // users of these instructions outside the node. An example of these would be
- // the multiply instructions of a partial multiply operation.
- SmallVector<Instruction *> InternalInstructions;
- ComplexDeinterleavingRotation Rotation;
+ // This two members are required exclusively for generating
+ // ComplexDeinterleavingOperation::Symmetric operations.
+ unsigned Opcode;
+ std::optional<FastMathFlags> Flags;
+
+ ComplexDeinterleavingRotation Rotation =
+ ComplexDeinterleavingRotation::Rotation_0;
SmallVector<RawNodePtr> Operands;
Value *ReplacementNode = nullptr;
- void addInstruction(Instruction *I) { InternalInstructions.push_back(I); }
void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
- bool hasAllInternalUses(SmallPtrSet<Instruction *, 16> &AllInstructions);
-
void dump() { dump(dbgs()); }
void dump(raw_ostream &OS) {
auto PrintValue = [&](Value *V) {
@@ -181,40 +193,105 @@ public:
OS << " - ";
PrintNodeRef(Op);
}
- OS << " InternalInstructions:\n";
- for (const auto &I : InternalInstructions) {
- OS << " - \"";
- I->print(OS, true);
- OS << "\"\n";
- }
}
};
class ComplexDeinterleavingGraph {
public:
+ struct Product {
+ Value *Multiplier;
+ Value *Multiplicand;
+ bool IsPositive;
+ };
+
+ using Addend = std::pair<Value *, bool>;
using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr;
using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr;
- explicit ComplexDeinterleavingGraph(const TargetLowering *tl) : TL(tl) {}
+
+ // Helper struct for holding info about potential partial multiplication
+ // candidates
+ struct PartialMulCandidate {
+ Value *Common;
+ NodePtr Node;
+ unsigned RealIdx;
+ unsigned ImagIdx;
+ bool IsNodeInverted;
+ };
+
+ explicit ComplexDeinterleavingGraph(const TargetLowering *TL,
+ const TargetLibraryInfo *TLI)
+ : TL(TL), TLI(TLI) {}
private:
- const TargetLowering *TL;
- Instruction *RootValue;
- NodePtr RootNode;
+ const TargetLowering *TL = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
SmallVector<NodePtr> CompositeNodes;
- SmallPtrSet<Instruction *, 16> AllInstructions;
+
+ SmallPtrSet<Instruction *, 16> FinalInstructions;
+
+ /// Root instructions are instructions from which complex computation starts
+ std::map<Instruction *, NodePtr> RootToNode;
+
+ /// Topologically sorted root instructions
+ SmallVector<Instruction *, 1> OrderedRoots;
+
+ /// When examining a basic block for complex deinterleaving, if it is a simple
+ /// one-block loop, then the only incoming block is 'Incoming' and the
+ /// 'BackEdge' block is the block itself."
+ BasicBlock *BackEdge = nullptr;
+ BasicBlock *Incoming = nullptr;
+
+ /// ReductionInfo maps from %ReductionOp to %PHInode and Instruction
+ /// %OutsideUser as it is shown in the IR:
+ ///
+ /// vector.body:
+ /// %PHInode = phi <vector type> [ zeroinitializer, %entry ],
+ /// [ %ReductionOp, %vector.body ]
+ /// ...
+ /// %ReductionOp = fadd i64 ...
+ /// ...
+ /// br i1 %condition, label %vector.body, %middle.block
+ ///
+ /// middle.block:
+ /// %OutsideUser = llvm.vector.reduce.fadd(..., %ReductionOp)
+ ///
+ /// %OutsideUser can be `llvm.vector.reduce.fadd` or `fadd` preceding
+ /// `llvm.vector.reduce.fadd` when unroll factor isn't one.
+ std::map<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo;
+
+ /// In the process of detecting a reduction, we consider a pair of
+ /// %ReductionOP, which we refer to as real and imag (or vice versa), and
+ /// traverse the use-tree to detect complex operations. As this is a reduction
+ /// operation, it will eventually reach RealPHI and ImagPHI, which corresponds
+ /// to the %ReductionOPs that we suspect to be complex.
+ /// RealPHI and ImagPHI are used by the identifyPHINode method.
+ PHINode *RealPHI = nullptr;
+ PHINode *ImagPHI = nullptr;
+
+ /// Set this flag to true if RealPHI and ImagPHI were reached during reduction
+ /// detection.
+ bool PHIsFound = false;
+
+ /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.
+ /// The new PHINode corresponds to a vector of deinterleaved complex numbers.
+ /// This mapping is populated during
+ /// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then
+ /// used in the ComplexDeinterleavingOperation::ReductionOperation node
+ /// replacement process.
+ std::map<PHINode *, PHINode *> OldToNewPHI;
NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation,
- Instruction *R, Instruction *I) {
+ Value *R, Value *I) {
+ assert(((Operation != ComplexDeinterleavingOperation::ReductionPHI &&
+ Operation != ComplexDeinterleavingOperation::ReductionOperation) ||
+ (R && I)) &&
+ "Reduction related nodes must have Real and Imaginary parts");
return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R,
I);
}
NodePtr submitCompositeNode(NodePtr Node) {
CompositeNodes.push_back(Node);
- AllInstructions.insert(Node->Real);
- AllInstructions.insert(Node->Imag);
- for (auto *I : Node->InternalInstructions)
- AllInstructions.insert(I);
return Node;
}
@@ -242,9 +319,9 @@ private:
/// Identify the other branch of a Partial Mul, taking the CommonOperandI that
/// is partially known from identifyPartialMul, filling in the other half of
/// the complex pair.
- NodePtr identifyNodeWithImplicitAdd(
- Instruction *I, Instruction *J,
- std::pair<Instruction *, Instruction *> &CommonOperandI);
+ NodePtr
+ identifyNodeWithImplicitAdd(Instruction *I, Instruction *J,
+ std::pair<Value *, Value *> &CommonOperandI);
/// Identifies a complex add pattern and its rotation, based on the following
/// patterns.
@@ -254,10 +331,76 @@ private:
/// 270: r: ar + bi
/// i: ai - br
NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
-
- NodePtr identifyNode(Instruction *I, Instruction *J);
-
- Value *replaceNode(RawNodePtr Node);
+ NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+
+ NodePtr identifyNode(Value *R, Value *I);
+
+ /// Determine if a sum of complex numbers can be formed from \p RealAddends
+ /// and \p ImagAddens. If \p Accumulator is not null, add the result to it.
+ /// Return nullptr if it is not possible to construct a complex number.
+ /// \p Flags are needed to generate symmetric Add and Sub operations.
+ NodePtr identifyAdditions(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags,
+ NodePtr Accumulator);
+
+ /// Extract one addend that have both real and imaginary parts positive.
+ NodePtr extractPositiveAddend(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends);
+
+ /// Determine if sum of multiplications of complex numbers can be formed from
+ /// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result
+ /// to it. Return nullptr if it is not possible to construct a complex number.
+ NodePtr identifyMultiplications(std::vector<Product> &RealMuls,
+ std::vector<Product> &ImagMuls,
+ NodePtr Accumulator);
+
+ /// Go through pairs of multiplication (one Real and one Imag) and find all
+ /// possible candidates for partial multiplication and put them into \p
+ /// Candidates. Returns true if all Product has pair with common operand
+ bool collectPartialMuls(const std::vector<Product> &RealMuls,
+ const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &Candidates);
+
+ /// If the code is compiled with -Ofast or expressions have `reassoc` flag,
+ /// the order of complex computation operations may be significantly altered,
+ /// and the real and imaginary parts may not be executed in parallel. This
+ /// function takes this into consideration and employs a more general approach
+ /// to identify complex computations. Initially, it gathers all the addends
+ /// and multiplicands and then constructs a complex expression from them.
+ NodePtr identifyReassocNodes(Instruction *I, Instruction *J);
+
+ NodePtr identifyRoot(Instruction *I);
+
+ /// Identifies the Deinterleave operation applied to a vector containing
+ /// complex numbers. There are two ways to represent the Deinterleave
+ /// operation:
+ /// * Using two shufflevectors with even indices for /pReal instruction and
+ /// odd indices for /pImag instructions (only for fixed-width vectors)
+ /// * Using two extractvalue instructions applied to `vector.deinterleave2`
+ /// intrinsic (for both fixed and scalable vectors)
+ NodePtr identifyDeinterleave(Instruction *Real, Instruction *Imag);
+
+ /// identifying the operation that represents a complex number repeated in a
+ /// Splat vector. There are two possible types of splats: ConstantExpr with
+ /// the opcode ShuffleVector and ShuffleVectorInstr. Both should have an
+ /// initialization mask with all values set to zero.
+ NodePtr identifySplat(Value *Real, Value *Imag);
+
+ NodePtr identifyPHINode(Instruction *Real, Instruction *Imag);
+
+ /// Identifies SelectInsts in a loop that has reduction with predication masks
+ /// and/or predicated tail folding
+ NodePtr identifySelectNode(Instruction *Real, Instruction *Imag);
+
+ Value *replaceNode(IRBuilderBase &Builder, RawNodePtr Node);
+
+ /// Complete IR modifications after producing new reduction operation:
+ /// * Populate the PHINode generated for
+ /// ComplexDeinterleavingOperation::ReductionPHI
+ /// * Deinterleave the final value outside of the loop and repurpose original
+ /// reduction users
+ void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
public:
void dump() { dump(dbgs()); }
@@ -270,9 +413,18 @@ public:
/// current graph.
bool identifyNodes(Instruction *RootI);
+ /// In case \pB is one-block loop, this function seeks potential reductions
+ /// and populates ReductionInfo. Returns true if any reductions were
+ /// identified.
+ bool collectPotentialReductions(BasicBlock *B);
+
+ void identifyReductionNodes();
+
+ /// Check that every instruction, from the roots to the leaves, has internal
+ /// uses.
+ bool checkNodes();
+
/// Perform the actual replacement of the underlying instruction graph.
- /// Returns false if the deinterleaving operation should be cancelled for the
- /// current graph.
void replaceNodes();
};
@@ -368,43 +520,39 @@ static bool isDeinterleavingMask(ArrayRef<int> Mask) {
return true;
}
-bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
- bool Changed = false;
+bool isNeg(Value *V) {
+ return match(V, m_FNeg(m_Value())) || match(V, m_Neg(m_Value()));
+}
- SmallVector<Instruction *> DeadInstrRoots;
+Value *getNegOperand(Value *V) {
+ assert(isNeg(V));
+ auto *I = cast<Instruction>(V);
+ if (I->getOpcode() == Instruction::FNeg)
+ return I->getOperand(0);
- for (auto &I : *B) {
- auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
- if (!SVI)
- continue;
+ return I->getOperand(1);
+}
- // Look for a shufflevector that takes separate vectors of the real and
- // imaginary components and recombines them into a single vector.
- if (!isInterleavingMask(SVI->getShuffleMask()))
- continue;
+bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
+ ComplexDeinterleavingGraph Graph(TL, TLI);
+ if (Graph.collectPotentialReductions(B))
+ Graph.identifyReductionNodes();
- ComplexDeinterleavingGraph Graph(TL);
- if (!Graph.identifyNodes(SVI))
- continue;
+ for (auto &I : *B)
+ Graph.identifyNodes(&I);
+ if (Graph.checkNodes()) {
Graph.replaceNodes();
- DeadInstrRoots.push_back(SVI);
- Changed = true;
- }
-
- for (const auto &I : DeadInstrRoots) {
- if (!I || I->getParent() == nullptr)
- continue;
- llvm::RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return true;
}
- return Changed;
+ return false;
}
ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
Instruction *Real, Instruction *Imag,
- std::pair<Instruction *, Instruction *> &PartialMatch) {
+ std::pair<Value *, Value *> &PartialMatch) {
LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag
<< "\n");
@@ -413,58 +561,47 @@ ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
return nullptr;
}
- if (Real->getOpcode() != Instruction::FMul ||
- Imag->getOpcode() != Instruction::FMul) {
- LLVM_DEBUG(dbgs() << " - Real or imaginary instruction is not fmul\n");
+ if ((Real->getOpcode() != Instruction::FMul &&
+ Real->getOpcode() != Instruction::Mul) ||
+ (Imag->getOpcode() != Instruction::FMul &&
+ Imag->getOpcode() != Instruction::Mul)) {
+ LLVM_DEBUG(
+ dbgs() << " - Real or imaginary instruction is not fmul or mul\n");
return nullptr;
}
- Instruction *R0 = dyn_cast<Instruction>(Real->getOperand(0));
- Instruction *R1 = dyn_cast<Instruction>(Real->getOperand(1));
- Instruction *I0 = dyn_cast<Instruction>(Imag->getOperand(0));
- Instruction *I1 = dyn_cast<Instruction>(Imag->getOperand(1));
- if (!R0 || !R1 || !I0 || !I1) {
- LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
- return nullptr;
- }
+ Value *R0 = Real->getOperand(0);
+ Value *R1 = Real->getOperand(1);
+ Value *I0 = Imag->getOperand(0);
+ Value *I1 = Imag->getOperand(1);
// A +/+ has a rotation of 0. If any of the operands are fneg, we flip the
// rotations and use the operand.
unsigned Negs = 0;
- SmallVector<Instruction *> FNegs;
- if (R0->getOpcode() == Instruction::FNeg ||
- R1->getOpcode() == Instruction::FNeg) {
+ Value *Op;
+ if (match(R0, m_Neg(m_Value(Op)))) {
Negs |= 1;
- if (R0->getOpcode() == Instruction::FNeg) {
- FNegs.push_back(R0);
- R0 = dyn_cast<Instruction>(R0->getOperand(0));
- } else {
- FNegs.push_back(R1);
- R1 = dyn_cast<Instruction>(R1->getOperand(0));
- }
- if (!R0 || !R1)
- return nullptr;
+ R0 = Op;
+ } else if (match(R1, m_Neg(m_Value(Op)))) {
+ Negs |= 1;
+ R1 = Op;
}
- if (I0->getOpcode() == Instruction::FNeg ||
- I1->getOpcode() == Instruction::FNeg) {
+
+ if (isNeg(I0)) {
Negs |= 2;
Negs ^= 1;
- if (I0->getOpcode() == Instruction::FNeg) {
- FNegs.push_back(I0);
- I0 = dyn_cast<Instruction>(I0->getOperand(0));
- } else {
- FNegs.push_back(I1);
- I1 = dyn_cast<Instruction>(I1->getOperand(0));
- }
- if (!I0 || !I1)
- return nullptr;
+ I0 = Op;
+ } else if (match(I1, m_Neg(m_Value(Op)))) {
+ Negs |= 2;
+ Negs ^= 1;
+ I1 = Op;
}
ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs;
- Instruction *CommonOperand;
- Instruction *UncommonRealOp;
- Instruction *UncommonImagOp;
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
if (R0 == I0 || R0 == I1) {
CommonOperand = R0;
@@ -512,7 +649,6 @@ ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
Node->Rotation = Rotation;
Node->addOperand(CommonNode);
Node->addOperand(UncommonNode);
- Node->InternalInstructions.append(FNegs);
return submitCompositeNode(Node);
}
@@ -522,26 +658,29 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag
<< "\n");
// Determine rotation
+ auto IsAdd = [](unsigned Op) {
+ return Op == Instruction::FAdd || Op == Instruction::Add;
+ };
+ auto IsSub = [](unsigned Op) {
+ return Op == Instruction::FSub || Op == Instruction::Sub;
+ };
ComplexDeinterleavingRotation Rotation;
- if (Real->getOpcode() == Instruction::FAdd &&
- Imag->getOpcode() == Instruction::FAdd)
+ if (IsAdd(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_0;
- else if (Real->getOpcode() == Instruction::FSub &&
- Imag->getOpcode() == Instruction::FAdd)
+ else if (IsSub(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_90;
- else if (Real->getOpcode() == Instruction::FSub &&
- Imag->getOpcode() == Instruction::FSub)
+ else if (IsSub(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_180;
- else if (Real->getOpcode() == Instruction::FAdd &&
- Imag->getOpcode() == Instruction::FSub)
+ else if (IsAdd(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_270;
else {
LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");
return nullptr;
}
- if (!Real->getFastMathFlags().allowContract() ||
- !Imag->getFastMathFlags().allowContract()) {
+ if (isa<FPMathOperator>(Real) &&
+ (!Real->getFastMathFlags().allowContract() ||
+ !Imag->getFastMathFlags().allowContract())) {
LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");
return nullptr;
}
@@ -560,18 +699,14 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
return nullptr;
}
- Instruction *R0 = dyn_cast<Instruction>(RealMulI->getOperand(0));
- Instruction *R1 = dyn_cast<Instruction>(RealMulI->getOperand(1));
- Instruction *I0 = dyn_cast<Instruction>(ImagMulI->getOperand(0));
- Instruction *I1 = dyn_cast<Instruction>(ImagMulI->getOperand(1));
- if (!R0 || !R1 || !I0 || !I1) {
- LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
- return nullptr;
- }
+ Value *R0 = RealMulI->getOperand(0);
+ Value *R1 = RealMulI->getOperand(1);
+ Value *I0 = ImagMulI->getOperand(0);
+ Value *I1 = ImagMulI->getOperand(1);
- Instruction *CommonOperand;
- Instruction *UncommonRealOp;
- Instruction *UncommonImagOp;
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
if (R0 == I0 || R0 == I1) {
CommonOperand = R0;
@@ -589,7 +724,7 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
Rotation == ComplexDeinterleavingRotation::Rotation_270)
std::swap(UncommonRealOp, UncommonImagOp);
- std::pair<Instruction *, Instruction *> PartialMatch(
+ std::pair<Value *, Value *> PartialMatch(
(Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
Rotation == ComplexDeinterleavingRotation::Rotation_180)
? CommonOperand
@@ -598,8 +733,16 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
Rotation == ComplexDeinterleavingRotation::Rotation_270)
? CommonOperand
: nullptr);
- NodePtr CNode = identifyNodeWithImplicitAdd(
- cast<Instruction>(CR), cast<Instruction>(CI), PartialMatch);
+
+ auto *CRInst = dyn_cast<Instruction>(CR);
+ auto *CIInst = dyn_cast<Instruction>(CI);
+
+ if (!CRInst || !CIInst) {
+ LLVM_DEBUG(dbgs() << " - Common operands are not instructions.\n");
+ return nullptr;
+ }
+
+ NodePtr CNode = identifyNodeWithImplicitAdd(CRInst, CIInst, PartialMatch);
if (!CNode) {
LLVM_DEBUG(dbgs() << " - No cnode identified\n");
return nullptr;
@@ -620,8 +763,6 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
NodePtr Node = prepareCompositeNode(
ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
- Node->addInstruction(RealMulI);
- Node->addInstruction(ImagMulI);
Node->Rotation = Rotation;
Node->addOperand(CommonRes);
Node->addOperand(UncommonRes);
@@ -696,129 +837,603 @@ static bool isInstructionPairMul(Instruction *A, Instruction *B) {
return match(A, Pattern) && match(B, Pattern);
}
+static bool isInstructionPotentiallySymmetric(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
+ Instruction *Imag) {
+ if (Real->getOpcode() != Imag->getOpcode())
+ return nullptr;
+
+ if (!isInstructionPotentiallySymmetric(Real) ||
+ !isInstructionPotentiallySymmetric(Imag))
+ return nullptr;
+
+ auto *R0 = Real->getOperand(0);
+ auto *I0 = Imag->getOperand(0);
+
+ NodePtr Op0 = identifyNode(R0, I0);
+ NodePtr Op1 = nullptr;
+ if (Op0 == nullptr)
+ return nullptr;
+
+ if (Real->isBinaryOp()) {
+ auto *R1 = Real->getOperand(1);
+ auto *I1 = Imag->getOperand(1);
+ Op1 = identifyNode(R1, I1);
+ if (Op1 == nullptr)
+ return nullptr;
+ }
+
+ if (isa<FPMathOperator>(Real) &&
+ Real->getFastMathFlags() != Imag->getFastMathFlags())
+ return nullptr;
+
+ auto Node = prepareCompositeNode(ComplexDeinterleavingOperation::Symmetric,
+ Real, Imag);
+ Node->Opcode = Real->getOpcode();
+ if (isa<FPMathOperator>(Real))
+ Node->Flags = Real->getFastMathFlags();
+
+ Node->addOperand(Op0);
+ if (Real->isBinaryOp())
+ Node->addOperand(Op1);
+
+ return submitCompositeNode(Node);
+}
+
ComplexDeinterleavingGraph::NodePtr
-ComplexDeinterleavingGraph::identifyNode(Instruction *Real, Instruction *Imag) {
- LLVM_DEBUG(dbgs() << "identifyNode on " << *Real << " / " << *Imag << "\n");
- if (NodePtr CN = getContainingComposite(Real, Imag)) {
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
+ LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
+ assert(R->getType() == I->getType() &&
+ "Real and imaginary parts should not have different types");
+ if (NodePtr CN = getContainingComposite(R, I)) {
LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
return CN;
}
- auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
- auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
- if (RealShuffle && ImagShuffle) {
- Value *RealOp1 = RealShuffle->getOperand(1);
- if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
- LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
- return nullptr;
- }
- Value *ImagOp1 = ImagShuffle->getOperand(1);
- if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
- LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
- return nullptr;
- }
+ if (NodePtr CN = identifySplat(R, I))
+ return CN;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if (!Real || !Imag)
+ return nullptr;
+
+ if (NodePtr CN = identifyDeinterleave(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifyPHINode(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifySelectNode(Real, Imag))
+ return CN;
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ bool HasCMulSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CMulPartial, NewVTy);
+ bool HasCAddSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CAdd, NewVTy);
+
+ if (HasCMulSupport && isInstructionPairMul(Real, Imag)) {
+ if (NodePtr CN = identifyPartialMul(Real, Imag))
+ return CN;
+ }
+
+ if (HasCAddSupport && isInstructionPairAdd(Real, Imag)) {
+ if (NodePtr CN = identifyAdd(Real, Imag))
+ return CN;
+ }
+
+ if (HasCMulSupport && HasCAddSupport) {
+ if (NodePtr CN = identifyReassocNodes(Real, Imag))
+ return CN;
+ }
+
+ if (NodePtr CN = identifySymmetricOperation(Real, Imag))
+ return CN;
- Value *RealOp0 = RealShuffle->getOperand(0);
- Value *ImagOp0 = ImagShuffle->getOperand(0);
+ LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n");
+ return nullptr;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real,
+ Instruction *Imag) {
+ auto IsOperationSupported = [](unsigned Opcode) -> bool {
+ return Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub;
+ };
+
+ if (!IsOperationSupported(Real->getOpcode()) ||
+ !IsOperationSupported(Imag->getOpcode()))
+ return nullptr;
- if (RealOp0 != ImagOp0) {
- LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ std::optional<FastMathFlags> Flags;
+ if (isa<FPMathOperator>(Real)) {
+ if (Real->getFastMathFlags() != Imag->getFastMathFlags()) {
+ LLVM_DEBUG(dbgs() << "The flags in Real and Imaginary instructions are "
+ "not identical\n");
return nullptr;
}
- ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
- ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
- if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
- LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ Flags = Real->getFastMathFlags();
+ if (!Flags->allowReassoc()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "the 'Reassoc' attribute is missing in the FastMath flags\n");
return nullptr;
}
+ }
- if (RealMask[0] != 0 || ImagMask[0] != 1) {
- LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
- return nullptr;
+ // Collect multiplications and addend instructions from the given instruction
+ // while traversing it operands. Additionally, verify that all instructions
+ // have the same fast math flags.
+ auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls,
+ std::list<Addend> &Addends) -> bool {
+ SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}};
+ SmallPtrSet<Value *, 8> Visited;
+ while (!Worklist.empty()) {
+ auto [V, IsPositive] = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(V).second)
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ Addends.emplace_back(V, IsPositive);
+ continue;
+ }
+
+ // If an instruction has more than one user, it indicates that it either
+ // has an external user, which will be later checked by the checkNodes
+ // function, or it is a subexpression utilized by multiple expressions. In
+ // the latter case, we will attempt to separately identify the complex
+ // operation from here in order to create a shared
+ // ComplexDeinterleavingCompositeNode.
+ if (I != Insn && I->getNumUses() > 1) {
+ LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n");
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::Add:
+ Worklist.emplace_back(I->getOperand(1), IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::FSub:
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::Sub:
+ if (isNeg(I)) {
+ Worklist.emplace_back(getNegOperand(I), !IsPositive);
+ } else {
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ }
+ break;
+ case Instruction::FMul:
+ case Instruction::Mul: {
+ Value *A, *B;
+ if (isNeg(I->getOperand(0))) {
+ A = getNegOperand(I->getOperand(0));
+ IsPositive = !IsPositive;
+ } else {
+ A = I->getOperand(0);
+ }
+
+ if (isNeg(I->getOperand(1))) {
+ B = getNegOperand(I->getOperand(1));
+ IsPositive = !IsPositive;
+ } else {
+ B = I->getOperand(1);
+ }
+ Muls.push_back(Product{A, B, IsPositive});
+ break;
+ }
+ case Instruction::FNeg:
+ Worklist.emplace_back(I->getOperand(0), !IsPositive);
+ break;
+ default:
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+
+ if (Flags && I->getFastMathFlags() != *Flags) {
+ LLVM_DEBUG(dbgs() << "The instruction's fast math flags are "
+ "inconsistent with the root instructions' flags: "
+ << *I << "\n");
+ return false;
+ }
}
+ return true;
+ };
- // Type checking, the shuffle type should be a vector type of the same
- // scalar type, but half the size
- auto CheckType = [&](ShuffleVectorInst *Shuffle) {
- Value *Op = Shuffle->getOperand(0);
- auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
- auto *OpTy = cast<FixedVectorType>(Op->getType());
+ std::vector<Product> RealMuls, ImagMuls;
+ std::list<Addend> RealAddends, ImagAddends;
+ if (!Collect(Real, RealMuls, RealAddends) ||
+ !Collect(Imag, ImagMuls, ImagAddends))
+ return nullptr;
- if (OpTy->getScalarType() != ShuffleTy->getScalarType())
- return false;
- if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
- return false;
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
- return true;
- };
+ NodePtr FinalNode;
+ if (!RealMuls.empty() || !ImagMuls.empty()) {
+ // If there are multiplicands, extract positive addend and use it as an
+ // accumulator
+ FinalNode = extractPositiveAddend(RealAddends, ImagAddends);
+ FinalNode = identifyMultiplications(RealMuls, ImagMuls, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
- auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
- if (!CheckType(Shuffle))
- return false;
+ // Identify and process remaining additions
+ if (!RealAddends.empty() || !ImagAddends.empty()) {
+ FinalNode = identifyAdditions(RealAddends, ImagAddends, Flags, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
+ assert(FinalNode && "FinalNode can not be nullptr here");
+ // Set the Real and Imag fields of the final node and submit it
+ FinalNode->Real = Real;
+ FinalNode->Imag = Imag;
+ submitCompositeNode(FinalNode);
+ return FinalNode;
+}
- ArrayRef<int> Mask = Shuffle->getShuffleMask();
- int Last = *Mask.rbegin();
+bool ComplexDeinterleavingGraph::collectPartialMuls(
+ const std::vector<Product> &RealMuls, const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &PartialMulCandidates) {
+ // Helper function to extract a common operand from two products
+ auto FindCommonInstruction = [](const Product &Real,
+ const Product &Imag) -> Value * {
+ if (Real.Multiplicand == Imag.Multiplicand ||
+ Real.Multiplicand == Imag.Multiplier)
+ return Real.Multiplicand;
- Value *Op = Shuffle->getOperand(0);
- auto *OpTy = cast<FixedVectorType>(Op->getType());
- int NumElements = OpTy->getNumElements();
+ if (Real.Multiplier == Imag.Multiplicand ||
+ Real.Multiplier == Imag.Multiplier)
+ return Real.Multiplier;
- // Ensure that the deinterleaving shuffle only pulls from the first
- // shuffle operand.
- return Last < NumElements;
- };
+ return nullptr;
+ };
+
+ // Iterating over real and imaginary multiplications to find common operands
+ // If a common operand is found, a partial multiplication candidate is created
+ // and added to the candidates vector The function returns false if no common
+ // operands are found for any product
+ for (unsigned i = 0; i < RealMuls.size(); ++i) {
+ bool FoundCommon = false;
+ for (unsigned j = 0; j < ImagMuls.size(); ++j) {
+ auto *Common = FindCommonInstruction(RealMuls[i], ImagMuls[j]);
+ if (!Common)
+ continue;
+
+ auto *A = RealMuls[i].Multiplicand == Common ? RealMuls[i].Multiplier
+ : RealMuls[i].Multiplicand;
+ auto *B = ImagMuls[j].Multiplicand == Common ? ImagMuls[j].Multiplier
+ : ImagMuls[j].Multiplicand;
+
+ auto Node = identifyNode(A, B);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, false});
+ }
+
+ Node = identifyNode(B, A);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, true});
+ }
+ }
+ if (!FoundCommon)
+ return false;
+ }
+ return true;
+}
- if (RealShuffle->getType() != ImagShuffle->getType()) {
- LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
- return nullptr;
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyMultiplications(
+ std::vector<Product> &RealMuls, std::vector<Product> &ImagMuls,
+ NodePtr Accumulator = nullptr) {
+ if (RealMuls.size() != ImagMuls.size())
+ return nullptr;
+
+ std::vector<PartialMulCandidate> Info;
+ if (!collectPartialMuls(RealMuls, ImagMuls, Info))
+ return nullptr;
+
+ // Map to store common instruction to node pointers
+ std::map<Value *, NodePtr> CommonToNode;
+ std::vector<bool> Processed(Info.size(), false);
+ for (unsigned I = 0; I < Info.size(); ++I) {
+ if (Processed[I])
+ continue;
+
+ PartialMulCandidate &InfoA = Info[I];
+ for (unsigned J = I + 1; J < Info.size(); ++J) {
+ if (Processed[J])
+ continue;
+
+ PartialMulCandidate &InfoB = Info[J];
+ auto *InfoReal = &InfoA;
+ auto *InfoImag = &InfoB;
+
+ auto NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ if (!NodeFromCommon) {
+ std::swap(InfoReal, InfoImag);
+ NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ }
+ if (!NodeFromCommon)
+ continue;
+
+ CommonToNode[InfoReal->Common] = NodeFromCommon;
+ CommonToNode[InfoImag->Common] = NodeFromCommon;
+ Processed[I] = true;
+ Processed[J] = true;
}
- if (!CheckDeinterleavingShuffle(RealShuffle)) {
- LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ }
+
+ std::vector<bool> ProcessedReal(RealMuls.size(), false);
+ std::vector<bool> ProcessedImag(ImagMuls.size(), false);
+ NodePtr Result = Accumulator;
+ for (auto &PMI : Info) {
+ if (ProcessedReal[PMI.RealIdx] || ProcessedImag[PMI.ImagIdx])
+ continue;
+
+ auto It = CommonToNode.find(PMI.Common);
+ // TODO: Process independent complex multiplications. Cases like this:
+ // A.real() * B where both A and B are complex numbers.
+ if (It == CommonToNode.end()) {
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed independent partial multiplication:\n";
+ for (auto *Mul : {&RealMuls[PMI.RealIdx], &RealMuls[PMI.RealIdx]})
+ dbgs().indent(4) << (Mul->IsPositive ? "+" : "-") << *Mul->Multiplier
+ << " multiplied by " << *Mul->Multiplicand << "\n";
+ });
return nullptr;
}
- if (!CheckDeinterleavingShuffle(ImagShuffle)) {
- LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
- return nullptr;
+
+ auto &RealMul = RealMuls[PMI.RealIdx];
+ auto &ImagMul = ImagMuls[PMI.ImagIdx];
+
+ auto NodeA = It->second;
+ auto NodeB = PMI.Node;
+ auto IsMultiplicandReal = PMI.Common == NodeA->Real;
+ // The following table illustrates the relationship between multiplications
+ // and rotations. If we consider the multiplication (X + iY) * (U + iV), we
+ // can see:
+ //
+ // Rotation | Real | Imag |
+ // ---------+--------+--------+
+ // 0 | x * u | x * v |
+ // 90 | -y * v | y * u |
+ // 180 | -x * u | -x * v |
+ // 270 | y * v | -y * u |
+ //
+ // Check if the candidate can indeed be represented by partial
+ // multiplication
+ // TODO: Add support for multiplication by complex one
+ if ((IsMultiplicandReal && PMI.IsNodeInverted) ||
+ (!IsMultiplicandReal && !PMI.IsNodeInverted))
+ continue;
+
+ // Determine the rotation based on the multiplications
+ ComplexDeinterleavingRotation Rotation;
+ if (IsMultiplicandReal) {
+ // Detect 0 and 180 degrees rotation
+ if (RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_0;
+ else if (!RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_180;
+ else
+ continue;
+
+ } else {
+ // Detect 90 and 270 degrees rotation
+ if (!RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_90;
+ else if (RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_270;
+ else
+ continue;
}
- NodePtr PlaceholderNode =
- prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Shuffle,
- RealShuffle, ImagShuffle);
- PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
- return submitCompositeNode(PlaceholderNode);
+ LLVM_DEBUG({
+ dbgs() << "Identified partial multiplication (X, Y) * (U, V):\n";
+ dbgs().indent(4) << "X: " << *NodeA->Real << "\n";
+ dbgs().indent(4) << "Y: " << *NodeA->Imag << "\n";
+ dbgs().indent(4) << "U: " << *NodeB->Real << "\n";
+ dbgs().indent(4) << "V: " << *NodeB->Imag << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr NodeMul = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, nullptr, nullptr);
+ NodeMul->Rotation = Rotation;
+ NodeMul->addOperand(NodeA);
+ NodeMul->addOperand(NodeB);
+ if (Result)
+ NodeMul->addOperand(Result);
+ submitCompositeNode(NodeMul);
+ Result = NodeMul;
+ ProcessedReal[PMI.RealIdx] = true;
+ ProcessedImag[PMI.ImagIdx] = true;
}
- if (RealShuffle || ImagShuffle)
+
+ // Ensure all products have been processed, if not return nullptr.
+ if (!all_of(ProcessedReal, [](bool V) { return V; }) ||
+ !all_of(ProcessedImag, [](bool V) { return V; })) {
+
+ // Dump debug information about which partial multiplications are not
+ // processed.
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed products (Real):\n";
+ for (size_t i = 0; i < ProcessedReal.size(); ++i) {
+ if (!ProcessedReal[i])
+ dbgs().indent(4) << (RealMuls[i].IsPositive ? "+" : "-")
+ << *RealMuls[i].Multiplier << " multiplied by "
+ << *RealMuls[i].Multiplicand << "\n";
+ }
+ dbgs() << "Unprocessed products (Imag):\n";
+ for (size_t i = 0; i < ProcessedImag.size(); ++i) {
+ if (!ProcessedImag[i])
+ dbgs().indent(4) << (ImagMuls[i].IsPositive ? "+" : "-")
+ << *ImagMuls[i].Multiplier << " multiplied by "
+ << *ImagMuls[i].Multiplicand << "\n";
+ }
+ });
return nullptr;
+ }
- auto *VTy = cast<FixedVectorType>(Real->getType());
- auto *NewVTy =
- FixedVectorType::get(VTy->getScalarType(), VTy->getNumElements() * 2);
+ return Result;
+}
- if (TL->isComplexDeinterleavingOperationSupported(
- ComplexDeinterleavingOperation::CMulPartial, NewVTy) &&
- isInstructionPairMul(Real, Imag)) {
- return identifyPartialMul(Real, Imag);
- }
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyAdditions(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags, NodePtr Accumulator = nullptr) {
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
- if (TL->isComplexDeinterleavingOperationSupported(
- ComplexDeinterleavingOperation::CAdd, NewVTy) &&
- isInstructionPairAdd(Real, Imag)) {
- return identifyAdd(Real, Imag);
+ NodePtr Result;
+ // If we have accumulator use it as first addend
+ if (Accumulator)
+ Result = Accumulator;
+ // Otherwise find an element with both positive real and imaginary parts.
+ else
+ Result = extractPositiveAddend(RealAddends, ImagAddends);
+
+ if (!Result)
+ return nullptr;
+
+ while (!RealAddends.empty()) {
+ auto ItR = RealAddends.begin();
+ auto [R, IsPositiveR] = *ItR;
+
+ bool FoundImag = false;
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [I, IsPositiveI] = *ItI;
+ ComplexDeinterleavingRotation Rotation;
+ if (IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ else if (!IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if (!IsPositiveR && !IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ else
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+
+ NodePtr AddNode;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180) {
+ AddNode = identifyNode(R, I);
+ } else {
+ AddNode = identifyNode(I, R);
+ }
+ if (AddNode) {
+ LLVM_DEBUG({
+ dbgs() << "Identified addition:\n";
+ dbgs().indent(4) << "X: " << *R << "\n";
+ dbgs().indent(4) << "Y: " << *I << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr TmpNode;
+ if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FAdd;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Add;
+ }
+ } else if (Rotation ==
+ llvm::ComplexDeinterleavingRotation::Rotation_180) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FSub;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Sub;
+ }
+ } else {
+ TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd,
+ nullptr, nullptr);
+ TmpNode->Rotation = Rotation;
+ }
+
+ TmpNode->addOperand(Result);
+ TmpNode->addOperand(AddNode);
+ submitCompositeNode(TmpNode);
+ Result = TmpNode;
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ FoundImag = true;
+ break;
+ }
+ }
+ if (!FoundImag)
+ return nullptr;
}
+ return Result;
+}
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::extractPositiveAddend(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends) {
+ for (auto ItR = RealAddends.begin(); ItR != RealAddends.end(); ++ItR) {
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [R, IsPositiveR] = *ItR;
+ auto [I, IsPositiveI] = *ItI;
+ if (IsPositiveR && IsPositiveI) {
+ auto Result = identifyNode(R, I);
+ if (Result) {
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ return Result;
+ }
+ }
+ }
+ }
return nullptr;
}
bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
- Instruction *Real;
- Instruction *Imag;
- if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
- return false;
+ // This potential root instruction might already have been recognized as
+ // reduction. Because RootToNode maps both Real and Imaginary parts to
+ // CompositeNode we should choose only one either Real or Imag instruction to
+ // use as an anchor for generating complex instruction.
+ auto It = RootToNode.find(RootI);
+ if (It != RootToNode.end() && It->second->Real == RootI) {
+ OrderedRoots.push_back(RootI);
+ return true;
+ }
- RootValue = RootI;
- AllInstructions.insert(RootI);
- RootNode = identifyNode(Real, Imag);
+ auto RootNode = identifyRoot(RootI);
+ if (!RootNode)
+ return false;
LLVM_DEBUG({
Function *F = RootI->getFunction();
@@ -828,62 +1443,627 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
dump(dbgs());
dbgs() << "\n";
});
+ RootToNode[RootI] = RootNode;
+ OrderedRoots.push_back(RootI);
+ return true;
+}
- // Check all instructions have internal uses
- for (const auto &Node : CompositeNodes) {
- if (!Node->hasAllInternalUses(AllInstructions)) {
- LLVM_DEBUG(dbgs() << " - Invalid internal uses\n");
- return false;
+bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
+ bool FoundPotentialReduction = false;
+
+ auto *Br = dyn_cast<BranchInst>(B->getTerminator());
+ if (!Br || Br->getNumSuccessors() != 2)
+ return false;
+
+ // Identify simple one-block loop
+ if (Br->getSuccessor(0) != B && Br->getSuccessor(1) != B)
+ return false;
+
+ SmallVector<PHINode *> PHIs;
+ for (auto &PHI : B->phis()) {
+ if (PHI.getNumIncomingValues() != 2)
+ continue;
+
+ if (!PHI.getType()->isVectorTy())
+ continue;
+
+ auto *ReductionOp = dyn_cast<Instruction>(PHI.getIncomingValueForBlock(B));
+ if (!ReductionOp)
+ continue;
+
+ // Check if final instruction is reduced outside of current block
+ Instruction *FinalReduction = nullptr;
+ auto NumUsers = 0u;
+ for (auto *U : ReductionOp->users()) {
+ ++NumUsers;
+ if (U == &PHI)
+ continue;
+ FinalReduction = dyn_cast<Instruction>(U);
+ }
+
+ if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B ||
+ isa<PHINode>(FinalReduction))
+ continue;
+
+ ReductionInfo[ReductionOp] = {&PHI, FinalReduction};
+ BackEdge = B;
+ auto BackEdgeIdx = PHI.getBasicBlockIndex(B);
+ auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0;
+ Incoming = PHI.getIncomingBlock(IncomingIdx);
+ FoundPotentialReduction = true;
+
+ // If the initial value of PHINode is an Instruction, consider it a leaf
+ // value of a complex deinterleaving graph.
+ if (auto *InitPHI =
+ dyn_cast<Instruction>(PHI.getIncomingValueForBlock(Incoming)))
+ FinalInstructions.insert(InitPHI);
+ }
+ return FoundPotentialReduction;
+}
+
+void ComplexDeinterleavingGraph::identifyReductionNodes() {
+ SmallVector<bool> Processed(ReductionInfo.size(), false);
+ SmallVector<Instruction *> OperationInstruction;
+ for (auto &P : ReductionInfo)
+ OperationInstruction.push_back(P.first);
+
+ // Identify a complex computation by evaluating two reduction operations that
+ // potentially could be involved
+ for (size_t i = 0; i < OperationInstruction.size(); ++i) {
+ if (Processed[i])
+ continue;
+ for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
+ if (Processed[j])
+ continue;
+
+ auto *Real = OperationInstruction[i];
+ auto *Imag = OperationInstruction[j];
+ if (Real->getType() != Imag->getType())
+ continue;
+
+ RealPHI = ReductionInfo[Real].first;
+ ImagPHI = ReductionInfo[Imag].first;
+ PHIsFound = false;
+ auto Node = identifyNode(Real, Imag);
+ if (!Node) {
+ std::swap(Real, Imag);
+ std::swap(RealPHI, ImagPHI);
+ Node = identifyNode(Real, Imag);
+ }
+
+ // If a node is identified and reduction PHINode is used in the chain of
+ // operations, mark its operation instructions as used to prevent
+ // re-identification and attach the node to the real part
+ if (Node && PHIsFound) {
+ LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "
+ << *Real << " / " << *Imag << "\n");
+ Processed[i] = true;
+ Processed[j] = true;
+ auto RootNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionOperation, Real, Imag);
+ RootNode->addOperand(Node);
+ RootToNode[Real] = RootNode;
+ RootToNode[Imag] = RootNode;
+ submitCompositeNode(RootNode);
+ break;
+ }
}
}
- return RootNode != nullptr;
+
+ RealPHI = nullptr;
+ ImagPHI = nullptr;
}
-Value *ComplexDeinterleavingGraph::replaceNode(
- ComplexDeinterleavingGraph::RawNodePtr Node) {
- if (Node->ReplacementNode)
- return Node->ReplacementNode;
+bool ComplexDeinterleavingGraph::checkNodes() {
+ // Collect all instructions from roots to leaves
+ SmallPtrSet<Instruction *, 16> AllInstructions;
+ SmallVector<Instruction *, 8> Worklist;
+ for (auto &Pair : RootToNode)
+ Worklist.push_back(Pair.first);
- Value *Input0 = replaceNode(Node->Operands[0]);
- Value *Input1 = replaceNode(Node->Operands[1]);
- Value *Accumulator =
- Node->Operands.size() > 2 ? replaceNode(Node->Operands[2]) : nullptr;
+ // Extract all instructions that are used by all XCMLA/XCADD/ADD/SUB/NEG
+ // chains
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
- assert(Input0->getType() == Input1->getType() &&
- "Node inputs need to be of the same type");
+ if (!AllInstructions.insert(I).second)
+ continue;
- Node->ReplacementNode = TL->createComplexDeinterleavingIR(
- Node->Real, Node->Operation, Node->Rotation, Input0, Input1, Accumulator);
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (!FinalInstructions.count(I))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ }
- assert(Node->ReplacementNode && "Target failed to create Intrinsic call.");
- NumComplexTransformations += 1;
- return Node->ReplacementNode;
+ // Find instructions that have users outside of chain
+ SmallVector<Instruction *, 2> OuterInstructions;
+ for (auto *I : AllInstructions) {
+ // Skip root nodes
+ if (RootToNode.count(I))
+ continue;
+
+ for (User *U : I->users()) {
+ if (AllInstructions.count(cast<Instruction>(U)))
+ continue;
+
+ // Found an instruction that is not used by XCMLA/XCADD chain
+ Worklist.emplace_back(I);
+ break;
+ }
+ }
+
+ // If any instructions are found to be used outside, find and remove roots
+ // that somehow connect to those instructions.
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Found an impacted root node. Removing it from the nodes to be
+ // deinterleaved
+ if (RootToNode.count(I)) {
+ LLVM_DEBUG(dbgs() << "Instruction " << *I
+ << " could be deinterleaved but its chain of complex "
+ "operations have an outside user\n");
+ RootToNode.erase(I);
+ }
+
+ if (!AllInstructions.count(I) || FinalInstructions.count(I))
+ continue;
+
+ for (User *U : I->users())
+ Worklist.emplace_back(cast<Instruction>(U));
+
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ return !RootToNode.empty();
}
-void ComplexDeinterleavingGraph::replaceNodes() {
- Value *R = replaceNode(RootNode.get());
- assert(R && "Unable to find replacement for RootValue");
- RootValue->replaceAllUsesWith(R);
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) {
+ if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) {
+ if (Intrinsic->getIntrinsicID() !=
+ Intrinsic::experimental_vector_interleave2)
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0));
+ auto *Imag = dyn_cast<Instruction>(Intrinsic->getOperand(1));
+ if (!Real || !Imag)
+ return nullptr;
+
+ return identifyNode(Real, Imag);
+ }
+
+ auto *SVI = dyn_cast<ShuffleVectorInst>(RootI);
+ if (!SVI)
+ return nullptr;
+
+ // Look for a shufflevector that takes separate vectors of the real and
+ // imaginary components and recombines them into a single vector.
+ if (!isInterleavingMask(SVI->getShuffleMask()))
+ return nullptr;
+
+ Instruction *Real;
+ Instruction *Imag;
+ if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
+ return nullptr;
+
+ return identifyNode(Real, Imag);
}
-bool ComplexDeinterleavingCompositeNode::hasAllInternalUses(
- SmallPtrSet<Instruction *, 16> &AllInstructions) {
- if (Operation == ComplexDeinterleavingOperation::Shuffle)
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real,
+ Instruction *Imag) {
+ Instruction *I = nullptr;
+ Value *FinalValue = nullptr;
+ if (match(Real, m_ExtractValue<0>(m_Instruction(I))) &&
+ match(Imag, m_ExtractValue<1>(m_Specific(I))) &&
+ match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>(
+ m_Value(FinalValue)))) {
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag);
+ PlaceholderNode->ReplacementNode = FinalValue;
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
+ return submitCompositeNode(PlaceholderNode);
+ }
+
+ auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
+ auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
+ if (!RealShuffle || !ImagShuffle) {
+ if (RealShuffle || ImagShuffle)
+ LLVM_DEBUG(dbgs() << " - There's a shuffle where there shouldn't be.\n");
+ return nullptr;
+ }
+
+ Value *RealOp1 = RealShuffle->getOperand(1);
+ if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
+ LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+ Value *ImagOp1 = ImagShuffle->getOperand(1);
+ if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
+ LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+
+ Value *RealOp0 = RealShuffle->getOperand(0);
+ Value *ImagOp0 = ImagShuffle->getOperand(0);
+
+ if (RealOp0 != ImagOp0) {
+ LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ return nullptr;
+ }
+
+ ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
+ ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
+ if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
+ LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ return nullptr;
+ }
+
+ if (RealMask[0] != 0 || ImagMask[0] != 1) {
+ LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
+ return nullptr;
+ }
+
+ // Type checking, the shuffle type should be a vector type of the same
+ // scalar type, but half the size
+ auto CheckType = [&](ShuffleVectorInst *Shuffle) {
+ Value *Op = Shuffle->getOperand(0);
+ auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+
+ if (OpTy->getScalarType() != ShuffleTy->getScalarType())
+ return false;
+ if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
+ return false;
+
return true;
+ };
- for (auto *User : Real->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
+ auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
+ if (!CheckType(Shuffle))
return false;
+
+ ArrayRef<int> Mask = Shuffle->getShuffleMask();
+ int Last = *Mask.rbegin();
+
+ Value *Op = Shuffle->getOperand(0);
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+ int NumElements = OpTy->getNumElements();
+
+ // Ensure that the deinterleaving shuffle only pulls from the first
+ // shuffle operand.
+ return Last < NumElements;
+ };
+
+ if (RealShuffle->getType() != ImagShuffle->getType()) {
+ LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
+ return nullptr;
}
- for (auto *User : Imag->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
- return false;
+ if (!CheckDeinterleavingShuffle(RealShuffle)) {
+ LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ return nullptr;
}
- for (auto *I : InternalInstructions) {
- for (auto *User : I->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
+ if (!CheckDeinterleavingShuffle(ImagShuffle)) {
+ LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
+ return nullptr;
+ }
+
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Deinterleave,
+ RealShuffle, ImagShuffle);
+ PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
+ FinalInstructions.insert(RealShuffle);
+ FinalInstructions.insert(ImagShuffle);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
+ auto IsSplat = [](Value *V) -> bool {
+ // Fixed-width vector with constants
+ if (isa<ConstantDataVector>(V))
+ return true;
+
+ VectorType *VTy;
+ ArrayRef<int> Mask;
+ // Splats are represented differently depending on whether the repeated
+ // value is a constant or an Instruction
+ if (auto *Const = dyn_cast<ConstantExpr>(V)) {
+ if (Const->getOpcode() != Instruction::ShuffleVector)
return false;
+ VTy = cast<VectorType>(Const->getType());
+ Mask = Const->getShuffleMask();
+ } else if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
+ VTy = Shuf->getType();
+ Mask = Shuf->getShuffleMask();
+ } else {
+ return false;
}
+
+ // When the data type is <1 x Type>, it's not possible to differentiate
+ // between the ComplexDeinterleaving::Deinterleave and
+ // ComplexDeinterleaving::Splat operations.
+ if (!VTy->isScalableTy() && VTy->getElementCount().getKnownMinValue() == 1)
+ return false;
+
+ return all_equal(Mask) && Mask[0] == 0;
+ };
+
+ if (!IsSplat(R) || !IsSplat(I))
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if ((!Real && Imag) || (Real && !Imag))
+ return nullptr;
+
+ if (Real && Imag) {
+ // Non-constant splats should be in the same basic block
+ if (Real->getParent() != Imag->getParent())
+ return nullptr;
+
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
}
- return true;
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(ComplexDeinterleavingOperation::Splat, R, I);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
+ Instruction *Imag) {
+ if (Real != RealPHI || Imag != ImagPHI)
+ return nullptr;
+
+ PHIsFound = true;
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,
+ Instruction *Imag) {
+ auto *SelectReal = dyn_cast<SelectInst>(Real);
+ auto *SelectImag = dyn_cast<SelectInst>(Imag);
+ if (!SelectReal || !SelectImag)
+ return nullptr;
+
+ Instruction *MaskA, *MaskB;
+ Instruction *AR, *AI, *RA, *BI;
+ if (!match(Real, m_Select(m_Instruction(MaskA), m_Instruction(AR),
+ m_Instruction(RA))) ||
+ !match(Imag, m_Select(m_Instruction(MaskB), m_Instruction(AI),
+ m_Instruction(BI))))
+ return nullptr;
+
+ if (MaskA != MaskB && !MaskA->isIdenticalTo(MaskB))
+ return nullptr;
+
+ if (!MaskA->getType()->isVectorTy())
+ return nullptr;
+
+ auto NodeA = identifyNode(AR, AI);
+ if (!NodeA)
+ return nullptr;
+
+ auto NodeB = identifyNode(RA, BI);
+ if (!NodeB)
+ return nullptr;
+
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionSelect, Real, Imag);
+ PlaceholderNode->addOperand(NodeA);
+ PlaceholderNode->addOperand(NodeB);
+ FinalInstructions.insert(MaskA);
+ FinalInstructions.insert(MaskB);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode,
+ std::optional<FastMathFlags> Flags,
+ Value *InputA, Value *InputB) {
+ Value *I;
+ switch (Opcode) {
+ case Instruction::FNeg:
+ I = B.CreateFNeg(InputA);
+ break;
+ case Instruction::FAdd:
+ I = B.CreateFAdd(InputA, InputB);
+ break;
+ case Instruction::Add:
+ I = B.CreateAdd(InputA, InputB);
+ break;
+ case Instruction::FSub:
+ I = B.CreateFSub(InputA, InputB);
+ break;
+ case Instruction::Sub:
+ I = B.CreateSub(InputA, InputB);
+ break;
+ case Instruction::FMul:
+ I = B.CreateFMul(InputA, InputB);
+ break;
+ case Instruction::Mul:
+ I = B.CreateMul(InputA, InputB);
+ break;
+ default:
+ llvm_unreachable("Incorrect symmetric opcode");
+ }
+ if (Flags)
+ cast<Instruction>(I)->setFastMathFlags(*Flags);
+ return I;
+}
+
+Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
+ RawNodePtr Node) {
+ if (Node->ReplacementNode)
+ return Node->ReplacementNode;
+
+ auto ReplaceOperandIfExist = [&](RawNodePtr &Node, unsigned Idx) -> Value * {
+ return Node->Operands.size() > Idx
+ ? replaceNode(Builder, Node->Operands[Idx])
+ : nullptr;
+ };
+
+ Value *ReplacementNode;
+ switch (Node->Operation) {
+ case ComplexDeinterleavingOperation::CAdd:
+ case ComplexDeinterleavingOperation::CMulPartial:
+ case ComplexDeinterleavingOperation::Symmetric: {
+ Value *Input0 = ReplaceOperandIfExist(Node, 0);
+ Value *Input1 = ReplaceOperandIfExist(Node, 1);
+ Value *Accumulator = ReplaceOperandIfExist(Node, 2);
+ assert(!Input1 || (Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type"));
+ assert(!Accumulator ||
+ (Input0->getType() == Accumulator->getType() &&
+ "Accumulator and input need to be of the same type"));
+ if (Node->Operation == ComplexDeinterleavingOperation::Symmetric)
+ ReplacementNode = replaceSymmetricNode(Builder, Node->Opcode, Node->Flags,
+ Input0, Input1);
+ else
+ ReplacementNode = TL->createComplexDeinterleavingIR(
+ Builder, Node->Operation, Node->Rotation, Input0, Input1,
+ Accumulator);
+ break;
+ }
+ case ComplexDeinterleavingOperation::Deinterleave:
+ llvm_unreachable("Deinterleave node should already have ReplacementNode");
+ break;
+ case ComplexDeinterleavingOperation::Splat: {
+ auto *NewTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(Node->Real->getType()));
+ auto *R = dyn_cast<Instruction>(Node->Real);
+ auto *I = dyn_cast<Instruction>(Node->Imag);
+ if (R && I) {
+ // Splats that are not constant are interleaved where they are located
+ Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode();
+ IRBuilder<> IRB(InsertPoint);
+ ReplacementNode =
+ IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy,
+ {Node->Real, Node->Imag});
+ } else {
+ ReplacementNode =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewTy, {Node->Real, Node->Imag});
+ }
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionPHI: {
+ // If Operation is ReductionPHI, a new empty PHINode is created.
+ // It is filled later when the ReductionOperation is processed.
+ auto *VTy = cast<VectorType>(Node->Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+ auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI());
+ OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+ ReplacementNode = NewPHI;
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionOperation:
+ ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+ processReductionOperation(ReplacementNode, Node);
+ break;
+ case ComplexDeinterleavingOperation::ReductionSelect: {
+ auto *MaskReal = cast<Instruction>(Node->Real)->getOperand(0);
+ auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0);
+ auto *A = replaceNode(Builder, Node->Operands[0]);
+ auto *B = replaceNode(Builder, Node->Operands[1]);
+ auto *NewMaskTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(MaskReal->getType()));
+ auto *NewMask =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewMaskTy, {MaskReal, MaskImag});
+ ReplacementNode = Builder.CreateSelect(NewMask, A, B);
+ break;
+ }
+ }
+
+ assert(ReplacementNode && "Target failed to create Intrinsic call.");
+ NumComplexTransformations += 1;
+ Node->ReplacementNode = ReplacementNode;
+ return ReplacementNode;
+}
+
+void ComplexDeinterleavingGraph::processReductionOperation(
+ Value *OperationReplacement, RawNodePtr Node) {
+ auto *Real = cast<Instruction>(Node->Real);
+ auto *Imag = cast<Instruction>(Node->Imag);
+ auto *OldPHIReal = ReductionInfo[Real].first;
+ auto *OldPHIImag = ReductionInfo[Imag].first;
+ auto *NewPHI = OldToNewPHI[OldPHIReal];
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ // We have to interleave initial origin values coming from IncomingBlock
+ Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming);
+ Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming);
+
+ IRBuilder<> Builder(Incoming->getTerminator());
+ auto *NewInit = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag});
+
+ NewPHI->addIncoming(NewInit, Incoming);
+ NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+ // Deinterleave complex vector outside of loop so that it can be finally
+ // reduced
+ auto *FinalReductionReal = ReductionInfo[Real].second;
+ auto *FinalReductionImag = ReductionInfo[Imag].second;
+
+ Builder.SetInsertPoint(
+ &*FinalReductionReal->getParent()->getFirstInsertionPt());
+ auto *Deinterleave = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_deinterleave2,
+ OperationReplacement->getType(), OperationReplacement);
+
+ auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0);
+ FinalReductionReal->replaceUsesOfWith(Real, NewReal);
+
+ Builder.SetInsertPoint(FinalReductionImag);
+ auto *NewImag = Builder.CreateExtractValue(Deinterleave, 1);
+ FinalReductionImag->replaceUsesOfWith(Imag, NewImag);
+}
+
+void ComplexDeinterleavingGraph::replaceNodes() {
+ SmallVector<Instruction *, 16> DeadInstrRoots;
+ for (auto *RootInstruction : OrderedRoots) {
+ // Check if this potential root went through check process and we can
+ // deinterleave it
+ if (!RootToNode.count(RootInstruction))
+ continue;
+
+ IRBuilder<> Builder(RootInstruction);
+ auto RootNode = RootToNode[RootInstruction];
+ Value *R = replaceNode(Builder, RootNode.get());
+
+ if (RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionOperation) {
+ auto *RootReal = cast<Instruction>(RootNode->Real);
+ auto *RootImag = cast<Instruction>(RootNode->Imag);
+ ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
+ ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(cast<Instruction>(RootReal));
+ DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+ } else {
+ assert(R && "Unable to find replacement for RootInstruction");
+ DeadInstrRoots.push_back(RootInstruction);
+ RootInstruction->replaceAllUsesWith(R);
+ }
+ }
+
+ for (auto *I : DeadInstrRoots)
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
}
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index eb2d449bc4af..106db7c51f27 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -49,7 +49,7 @@ CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const unsigned BBSize = BB->size();
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
// Clear out the register class data.
Classes[i] = nullptr;
@@ -111,7 +111,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] != ~0u) {
// If Reg is currently live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
@@ -213,9 +213,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- KeepRegs.set(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ KeepRegs.set(SubReg);
}
}
}
@@ -238,13 +237,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// itself can't be changed.
if (MI.isRegTiedToUseOperand(I) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs) {
- KeepRegs.set(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ KeepRegs.set(SubReg);
}
- for (MCSuperRegIterator SuperRegs(Reg, TRI);
- SuperRegs.isValid(); ++SuperRegs) {
- KeepRegs.set(*SuperRegs);
+ for (MCPhysReg SuperReg : TRI->superregs(Reg)) {
+ KeepRegs.set(SuperReg);
}
}
}
@@ -264,14 +261,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
if (MO.isRegMask()) {
auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
- for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
- if (!MO.clobbersPhysReg(*SRI))
- return false;
-
- return true;
+ return all_of(TRI->subregs_inclusive(PhysReg),
+ [&](MCPhysReg SR) { return MO.clobbersPhysReg(SR); });
};
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
if (ClobbersPhysRegAndSubRegs(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
@@ -297,8 +291,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
// For the reg itself and all subregs: update the def to current;
// reset the kill state, any restrictions, and references.
- for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
- unsigned SubregReg = *SRI;
+ for (MCPhysReg SubregReg : TRI->subregs_inclusive(Reg)) {
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
Classes[SubregReg] = nullptr;
@@ -307,8 +300,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
KeepRegs.reset(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
- Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ Classes[SR] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -470,7 +463,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
LLVM_DEBUG(dbgs() << "Critical path has total latency "
<< (Max->getDepth() + Max->Latency) << "\n");
LLVM_DEBUG(dbgs() << "Available regs:");
- for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] == ~0u)
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index 34fb1d286a58..48bb4a07662e 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -29,8 +29,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -98,34 +96,6 @@ unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
return RS[InstIdx] ^ RS[InstIdx - 1];
}
-namespace llvm {
-
-// This class extends ScheduleDAGInstrs and overrides the schedule method
-// to build the dependence graph.
-class DefaultVLIWScheduler : public ScheduleDAGInstrs {
-private:
- AAResults *AA;
- /// Ordered list of DAG postprocessing steps.
- std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
-
-public:
- DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- AAResults *AA);
-
- // Actual scheduling work.
- void schedule() override;
-
- /// DefaultVLIWScheduler takes ownership of the Mutation object.
- void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
- Mutations.push_back(std::move(Mutation));
- }
-
-protected:
- void postprocessDAG();
-};
-
-} // end namespace llvm
-
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
AAResults *AA)
@@ -134,7 +104,7 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
}
/// Apply each ScheduleDAGMutation step in order.
-void DefaultVLIWScheduler::postprocessDAG() {
+void DefaultVLIWScheduler::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
@@ -142,7 +112,7 @@ void DefaultVLIWScheduler::postprocessDAG() {
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
- postprocessDAG();
+ postProcessDAG();
}
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
@@ -264,7 +234,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
"added to packet\n "
<< MI);
// End the packet if resource is not available, or if the instruction
- // shoud not be added to the current packet.
+ // should not be added to the current packet.
endPacket(MBB, MI);
}
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index e36db43567c5..6a7de3b241fe 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -31,8 +31,8 @@ namespace {
class DeadMachineInstructionElim : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
- const MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
LiveRegUnits LivePhysRegs;
public:
@@ -75,27 +75,25 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Examine each operand.
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isDef()) {
- Register Reg = MO.getReg();
- if (Reg.isPhysical()) {
- // Don't delete live physreg defs, or any reserved register defs.
- if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
- return false;
- } else {
- if (MO.isDead()) {
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (MO.isDead()) {
#ifndef NDEBUG
- // Basic check on the register. All of them should be 'undef'.
- for (auto &U : MRI->use_nodbg_operands(Reg))
- assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+ // Basic check on the register. All of them should be 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
#endif
- continue;
- }
- for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
- if (&Use != MI)
- // This def has a non-debug use. Don't delete the instruction!
- return false;
- }
+ continue;
+ }
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
}
}
}
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
index bbb89855cfff..86e9f3abe010 100644
--- a/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -25,7 +25,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,98 +33,19 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <deque>
using namespace llvm;
#define DEBUG_TYPE "detect-dead-lanes"
-namespace {
-
-/// Contains a bitmask of which lanes of a given virtual register are
-/// defined and which ones are actually used.
-struct VRegInfo {
- LaneBitmask UsedLanes;
- LaneBitmask DefinedLanes;
-};
-
-class DetectDeadLanes : public MachineFunctionPass {
-public:
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- static char ID;
- DetectDeadLanes() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "Detect Dead Lanes"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- /// Add used lane bits on the register used by operand \p MO. This translates
- /// the bitmask based on the operands subregister, and puts the register into
- /// the worklist if any new bits were added.
- void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
-
- /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
- /// COPY-like instruction determine the lanes used on the use operands
- /// and call addUsedLanesOnOperand() for them.
- void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes);
-
- /// Given a use regiser operand \p Use and a mask of defined lanes, check
- /// if the operand belongs to a lowersToCopies() instruction, transfer the
- /// mask to the def and put the instruction into the worklist.
- void transferDefinedLanesStep(const MachineOperand &Use,
- LaneBitmask DefinedLanes);
-
- /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
- /// of COPY-like instruction, determine which lanes are defined at the output
- /// operand \p Def.
- LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
- LaneBitmask DefinedLanes) const;
-
- /// Given a mask \p UsedLanes used from the output of instruction \p MI
- /// determine which lanes are used from operand \p MO of this instruction.
- LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
- const MachineOperand &MO) const;
-
- std::pair<bool, bool> runOnce(MachineFunction &MF);
-
- LaneBitmask determineInitialDefinedLanes(unsigned Reg);
- LaneBitmask determineInitialUsedLanes(unsigned Reg);
-
- bool isUndefRegAtInput(const MachineOperand &MO,
- const VRegInfo &RegInfo) const;
-
- bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const;
-
- const MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
-
- void PutInWorklist(unsigned RegIdx) {
- if (WorklistMembers.test(RegIdx))
- return;
- WorklistMembers.set(RegIdx);
- Worklist.push_back(RegIdx);
- }
-
- VRegInfo *VRegInfos;
- /// Worklist containing virtreg indexes.
- std::deque<unsigned> Worklist;
- BitVector WorklistMembers;
- /// This bitvector is set for each vreg index where the vreg is defined
- /// by an instruction where lowersToCopies()==true.
- BitVector DefinedByCopy;
-};
-
-} // end anonymous namespace
-
-char DetectDeadLanes::ID = 0;
-char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
-
-INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
+DeadLaneDetector::DeadLaneDetector(const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI)
+ : MRI(MRI), TRI(TRI) {
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = std::unique_ptr<VRegInfo[]>(new VRegInfo[NumVirtRegs]);
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+}
/// Returns true if \p MI will get lowered to a series of COPY instructions.
/// We call this a COPY-like instruction.
@@ -159,11 +80,11 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
unsigned DstSubIdx = 0;
switch (MI.getOpcode()) {
case TargetOpcode::INSERT_SUBREG:
- if (MI.getOperandNo(&MO) == 2)
+ if (MO.getOperandNo() == 2)
DstSubIdx = MI.getOperand(3).getImm();
break;
case TargetOpcode::REG_SEQUENCE: {
- unsigned OpNum = MI.getOperandNo(&MO);
+ unsigned OpNum = MO.getOperandNo();
DstSubIdx = MI.getOperand(OpNum+1).getImm();
break;
}
@@ -184,8 +105,8 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
return !TRI.getCommonSubClass(SrcRC, DstRC);
}
-void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
- LaneBitmask UsedLanes) {
+void DeadLaneDetector::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
if (!MO.readsReg())
return;
Register MOReg = MO.getReg();
@@ -198,7 +119,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
unsigned MORegIdx = Register::virtReg2Index(MOReg);
- VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ DeadLaneDetector::VRegInfo &MORegInfo = VRegInfos[MORegIdx];
LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
// Any change at all?
if ((UsedLanes & ~PrevUsedLanes).none())
@@ -210,8 +131,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
PutInWorklist(MORegIdx);
}
-void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
- LaneBitmask UsedLanes) {
+void DeadLaneDetector::transferUsedLanesStep(const MachineInstr &MI,
+ LaneBitmask UsedLanes) {
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
@@ -220,10 +141,11 @@ void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
}
}
-LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
- LaneBitmask UsedLanes,
- const MachineOperand &MO) const {
- unsigned OpNum = MI.getOperandNo(&MO);
+LaneBitmask
+DeadLaneDetector::transferUsedLanes(const MachineInstr &MI,
+ LaneBitmask UsedLanes,
+ const MachineOperand &MO) const {
+ unsigned OpNum = MO.getOperandNo();
assert(lowersToCopies(MI) &&
DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
@@ -265,8 +187,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
}
}
-void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
- LaneBitmask DefinedLanes) {
+void DeadLaneDetector::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
if (!Use.readsReg())
return;
// Check whether the operand writes a vreg and is part of a COPY-like
@@ -286,7 +208,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
if (!DefinedByCopy.test(DefRegIdx))
return;
- unsigned OpNum = MI.getOperandNo(&Use);
+ unsigned OpNum = Use.getOperandNo();
DefinedLanes =
TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
@@ -301,8 +223,8 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
PutInWorklist(DefRegIdx);
}
-LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
- unsigned OpNum, LaneBitmask DefinedLanes) const {
+LaneBitmask DeadLaneDetector::transferDefinedLanes(
+ const MachineOperand &Def, unsigned OpNum, LaneBitmask DefinedLanes) const {
const MachineInstr &MI = *Def.getParent();
// Translate DefinedLanes if necessary.
switch (MI.getOpcode()) {
@@ -343,7 +265,7 @@ LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
return DefinedLanes;
}
-LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
+LaneBitmask DeadLaneDetector::determineInitialDefinedLanes(unsigned Reg) {
// Live-In or unused registers have no definition but are considered fully
// defined.
if (!MRI->hasOneDef(Reg))
@@ -395,7 +317,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
MOSubReg, MODefinedLanes);
}
- unsigned OpNum = DefMI.getOperandNo(&MO);
+ unsigned OpNum = MO.getOperandNo();
DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
}
return DefinedLanes;
@@ -408,7 +330,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
return MRI->getMaxLaneMaskForVReg(Reg);
}
-LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
+LaneBitmask DeadLaneDetector::determineInitialUsedLanes(unsigned Reg) {
LaneBitmask UsedLanes = LaneBitmask::getNone();
for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
if (!MO.readsReg())
@@ -449,14 +371,58 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
return UsedLanes;
}
-bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
- const VRegInfo &RegInfo) const {
+namespace {
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "Detect Dead Lanes"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ /// update the operand status.
+ /// The first return value shows whether MF been changed.
+ /// The second return value indicates we need to call
+ /// DeadLaneDetector::computeSubRegisterLaneBitInfo and this function again
+ /// to propagate changes.
+ std::pair<bool, bool>
+ modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF);
+
+ bool isUndefRegAtInput(const MachineOperand &MO,
+ const DeadLaneDetector::VRegInfo &RegInfo) const;
+
+ bool isUndefInput(const DeadLaneDetector &DLD, const MachineOperand &MO,
+ bool *CrossCopy) const;
+
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
+
+bool DetectDeadLanes::isUndefRegAtInput(
+ const MachineOperand &MO, const DeadLaneDetector::VRegInfo &RegInfo) const {
unsigned SubReg = MO.getSubReg();
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
}
-bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
+bool DetectDeadLanes::isUndefInput(const DeadLaneDetector &DLD,
+ const MachineOperand &MO,
bool *CrossCopy) const {
if (!MO.isUse())
return false;
@@ -468,11 +434,11 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (!DefReg.isVirtual())
return false;
unsigned DefRegIdx = Register::virtReg2Index(DefReg);
- if (!DefinedByCopy.test(DefRegIdx))
+ if (!DLD.isDefinedByCopy(DefRegIdx))
return false;
- const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
- LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
+ const DeadLaneDetector::VRegInfo &DefRegInfo = DLD.getVRegInfo(DefRegIdx);
+ LaneBitmask UsedLanes = DLD.transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
if (UsedLanes.any())
return false;
@@ -484,7 +450,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return true;
}
-std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
+void DeadLaneDetector::computeSubRegisterLaneBitInfo() {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
@@ -524,7 +490,11 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
}
dbgs() << "\n";
});
+}
+std::pair<bool, bool>
+DetectDeadLanes::modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF) {
bool Changed = false;
bool Again = false;
// Mark operands as dead/unused.
@@ -537,7 +507,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
if (!Reg.isVirtual())
continue;
unsigned RegIdx = Register::virtReg2Index(Reg);
- const VRegInfo &RegInfo = VRegInfos[RegIdx];
+ const DeadLaneDetector::VRegInfo &RegInfo = DLD.getVRegInfo(RegIdx);
if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as dead in " << MI);
@@ -551,7 +521,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
Changed = true;
- } else if (isUndefInput(MO, &CrossCopy)) {
+ } else if (isUndefInput(DLD, MO, &CrossCopy)) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
@@ -581,21 +551,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
TRI = MRI->getTargetRegisterInfo();
- unsigned NumVirtRegs = MRI->getNumVirtRegs();
- VRegInfos = new VRegInfo[NumVirtRegs];
- WorklistMembers.resize(NumVirtRegs);
- DefinedByCopy.resize(NumVirtRegs);
+ DeadLaneDetector DLD(MRI, TRI);
bool Changed = false;
bool Again;
do {
+ DLD.computeSubRegisterLaneBitInfo();
bool LocalChanged;
- std::tie(LocalChanged, Again) = runOnce(MF);
+ std::tie(LocalChanged, Again) = modifySubRegisterOperandStatus(DLD, MF);
Changed |= LocalChanged;
- } while(Again);
+ } while (Again);
- DefinedByCopy.clear();
- WorklistMembers.clear();
- delete[] VRegInfos;
return Changed;
}
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index aa81f618dc59..32c94de7280c 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,10 +14,8 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,6 +26,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -36,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 00626604d81c..61867d74bfa2 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -119,10 +119,10 @@ public:
SmallVector<PHIInfo, 8> PHIs;
-private:
/// The branch condition determined by analyzeBranch.
SmallVector<MachineOperand, 4> Cond;
+private:
/// Instructions in Head that define values used by the conditional blocks.
/// The hoisted instructions must be inserted after these instructions.
SmallPtrSet<MachineInstr*, 8> InsertAfter;
@@ -263,9 +263,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
// Remember clobbered regunits.
if (MO.isDef() && Reg.isPhysical())
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- ClobberedRegUnits.set(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ ClobberedRegUnits.set(Unit);
if (!MO.readsReg() || !Reg.isVirtual())
continue;
@@ -343,8 +342,11 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
// Apply predicate to all instructions in the machine block.
void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
auto Condition = Cond;
- if (ReversePredicate)
- TII->reverseBranchCondition(Condition);
+ if (ReversePredicate) {
+ bool CanRevCond = !TII->reverseBranchCondition(Condition);
+ assert(CanRevCond && "Reversed predicate is not supported");
+ (void)CanRevCond;
+ }
// Terminators don't need to be predicated as they will be removed.
for (MachineBasicBlock::iterator I = MBB->begin(),
E = MBB->getFirstTerminator();
@@ -391,19 +393,17 @@ bool SSAIfConv::findInsertionPoint() {
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- LiveRegUnits.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ LiveRegUnits.erase(Unit);
// Unless I reads Reg.
if (MO.readsReg())
Reads.push_back(Reg.asMCReg());
}
// Anything read by I is live before I.
while (!Reads.empty())
- for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
- ++Units)
- if (ClobberedRegUnits.test(*Units))
- LiveRegUnits.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val()))
+ if (ClobberedRegUnits.test(Unit))
+ LiveRegUnits.insert(Unit);
// We can't insert before a terminator.
if (I != FirstTerm && I->isTerminator())
@@ -760,14 +760,14 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
namespace {
class EarlyIfConverter : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
MCSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DomTree;
- MachineLoopInfo *Loops;
- MachineTraceMetrics *Traces;
- MachineTraceMetrics::Ensemble *MinInstr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *MinInstr = nullptr;
SSAIfConv IfConv;
public:
@@ -873,8 +873,40 @@ bool EarlyIfConverter::shouldConvertIf() {
if (Stress)
return true;
+ // Do not try to if-convert if the condition has a high chance of being
+ // predictable.
+ MachineLoop *CurrentLoop = Loops->getLoopFor(IfConv.Head);
+ // If the condition is in a loop, consider it predictable if the condition
+ // itself or all its operands are loop-invariant. E.g. this considers a load
+ // from a loop-invariant address predictable; we were unable to prove that it
+ // doesn't alias any of the memory-writes in the loop, but it is likely to
+ // read to same value multiple times.
+ if (CurrentLoop && any_of(IfConv.Cond, [&](MachineOperand &MO) {
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def) ||
+ all_of(Def->operands(), [&](MachineOperand &Op) {
+ if (Op.isImm())
+ return true;
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def);
+ });
+ }))
+ return false;
+
if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
@@ -1084,13 +1116,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
namespace {
class EarlyIfPredicator : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
TargetSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DomTree;
- MachineBranchProbabilityInfo *MBPI;
- MachineLoopInfo *Loops;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineLoopInfo *Loops = nullptr;
SSAIfConv IfConv;
public:
diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
index 9621ad4b1248..21a7d02a320c 100644
--- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
@@ -318,7 +318,7 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// If the collapsed operands force a single domain, propagate the collapse.
if (isPowerOf2_32(available)) {
- unsigned domain = countTrailingZeros(available);
+ unsigned domain = llvm::countr_zero(available);
TII->setExecutionDomain(*mi, domain);
visitHardInstr(mi, domain);
return;
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 3838eaadd1d2..500f31bd8e89 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -71,18 +71,18 @@ class MemCmpExpansion {
ResultBlock() = default;
};
- CallInst *const CI;
+ CallInst *const CI = nullptr;
ResultBlock ResBlock;
const uint64_t Size;
unsigned MaxLoadSize = 0;
uint64_t NumLoadsNonOneByte = 0;
const uint64_t NumLoadsPerBlockForZeroCmp;
std::vector<BasicBlock *> LoadCmpBlocks;
- BasicBlock *EndBlock;
- PHINode *PhiRes;
+ BasicBlock *EndBlock = nullptr;
+ PHINode *PhiRes = nullptr;
const bool IsUsedForZeroCmp;
const DataLayout &DL;
- DomTreeUpdater *DTU;
+ DomTreeUpdater *DTU = nullptr;
IRBuilder<> Builder;
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
@@ -288,17 +288,11 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
Align RhsAlign = RhsSource->getPointerAlignment(DL);
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
- LhsSource = Builder.CreateConstGEP1_64(
- ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
- OffsetBytes);
- RhsSource = Builder.CreateConstGEP1_64(
- ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
- OffsetBytes);
+ LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
+ RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
}
- LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
- RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
// Create a constant or a load from the source.
Value *Lhs = nullptr;
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index cc63984158c8..3a79f20f4732 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
namespace {
struct ExpandPostRA : public MachineFunctionPass {
private:
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -47,9 +47,6 @@ public:
private:
bool LowerSubregToReg(MachineInstr *MI);
- bool LowerCopy(MachineInstr *MI);
-
- void TransferImplicitOperands(MachineInstr *MI);
};
} // end anonymous namespace
@@ -59,25 +56,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE,
"Post-RA pseudo instruction expansion pass", false, false)
-/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
-/// replacement instructions immediately precede it. Copy any implicit
-/// operands from MI to the replacement instruction.
-void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
- MachineBasicBlock::iterator CopyMI = MI;
- --CopyMI;
-
- Register DstReg = MI->getOperand(0).getReg();
- for (const MachineOperand &MO : MI->implicit_operands()) {
- CopyMI->addOperand(MO);
-
- // Be conservative about preserving kills when subregister defs are
- // involved. If there was implicit kill of a super-register overlapping the
- // copy result, we would kill the subregisters previous copies defined.
- if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
- CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
- }
-}
-
bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
@@ -137,50 +115,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
return true;
}
-bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
-
- if (MI->allDefsAreDead()) {
- LLVM_DEBUG(dbgs() << "dead copy: " << *MI);
- MI->setDesc(TII->get(TargetOpcode::KILL));
- LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
- return true;
- }
-
- MachineOperand &DstMO = MI->getOperand(0);
- MachineOperand &SrcMO = MI->getOperand(1);
-
- bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
- if (IdentityCopy || SrcMO.isUndef()) {
- LLVM_DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ")
- << *MI);
- // No need to insert an identity copy instruction, but replace with a KILL
- // if liveness is changed.
- if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
- // We must make sure the super-register gets killed. Replace the
- // instruction with KILL.
- MI->setDesc(TII->get(TargetOpcode::KILL));
- LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
- return true;
- }
- // Vanilla identity copy.
- MI->eraseFromParent();
- return true;
- }
-
- LLVM_DEBUG(dbgs() << "real copy: " << *MI);
- TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
- DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
-
- if (MI->getNumOperands() > 2)
- TransferImplicitOperands(MI);
- LLVM_DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "replaced by: " << *(--dMI);
- });
- MI->eraseFromParent();
- return true;
-}
-
/// runOnMachineFunction - Reduce subregister inserts and extracts to register
/// copies.
///
@@ -211,7 +145,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
MadeChange |= LowerSubregToReg(&MI);
break;
case TargetOpcode::COPY:
- MadeChange |= LowerCopy(&MI);
+ TII->lowerCopy(&MI, TRI);
+ MadeChange = true;
break;
case TargetOpcode::DBG_VALUE:
continue;
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index f08c47d220ea..79b6dc9154b3 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -1,4 +1,4 @@
-//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
+//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -133,10 +133,38 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
break;
}
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or: {
+ // Canonicalize logical or/and reductions:
+ // Or reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp ne iReduxWidth %val, 0
+ // And reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp eq iReduxWidth %val, 11111
+ Value *Vec = II->getArgOperand(0);
+ auto *FTy = cast<FixedVectorType>(Vec->getType());
+ unsigned NumElts = FTy->getNumElements();
+ if (!isPowerOf2_32(NumElts))
+ continue;
+
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
+ if (ID == Intrinsic::vector_reduce_and) {
+ Rdx = Builder.CreateICmpEQ(
+ Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
+ } else {
+ assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
+ Rdx = Builder.CreateIsNotNull(Rdx);
+ }
+ break;
+ }
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
- case Intrinsic::vector_reduce_and:
- case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 5ee76ff567fb..9807be0bea39 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -171,6 +171,10 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// Lower this VP fp call to a unpredicated fp call.
+ Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI,
+ unsigned UnpredicatedIntrinsicID);
+
/// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
@@ -271,6 +275,38 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+Value *CachingVPExpander::expandPredicationToFPCall(
+ IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ switch (UnpredicatedIntrinsicID) {
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt: {
+ Value *Op0 = VPI.getOperand(0);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Op2 = VPI.getOperand(2);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp =
+ Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+
+ return nullptr;
+}
+
static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Type *EltTy) {
bool Negative = false;
@@ -565,6 +601,15 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
switch (VPI.getIntrinsicID()) {
default:
break;
+ case Intrinsic::vp_fneg: {
+ Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName());
+ replaceOperation(*NewNegOp, VPI);
+ return NewNegOp;
+ }
+ case Intrinsic::vp_fabs:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
+ case Intrinsic::vp_sqrt:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::sqrt);
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::vp_gather:
@@ -572,6 +617,10 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
return expandPredicationInMemoryIntrinsic(Builder, VPI);
}
+ if (auto CID = VPI.getConstrainedIntrinsicID())
+ if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
+ return Call;
+
return &VPI;
}
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 55d939de426e..75504ef32250 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -388,7 +388,7 @@ public:
Register Reg = MO.getReg();
assert(Reg.isPhysical() && "Only physical regs are expected");
- if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !is_contained(GCRegs, Reg)))
+ if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !GCRegs.contains(Reg)))
continue;
LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index "
@@ -407,7 +407,6 @@ public:
void spillRegisters() {
for (Register Reg : RegsToSpill) {
int FI = CacheFI.getFrameIndex(Reg, EHPad);
- const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
NumSpilledRegisters++;
RegToSlotIdx[Reg] = FI;
@@ -419,6 +418,7 @@ public:
bool IsKill = true;
MachineBasicBlock::iterator InsertBefore(MI);
Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 80feb0045406..c0ce37091933 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -52,8 +52,8 @@ public:
/// in the machine code. It inserts labels at safe points and populates a
/// GCMetadata record for each function.
class GCMachineCodeAnalysis : public MachineFunctionPass {
- GCFunctionInfo *FI;
- const TargetInstrInfo *TII;
+ GCFunctionInfo *FI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
void FindSafePoints(MachineFunction &MF);
void VisitCallPoint(MachineBasicBlock::iterator CI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 356d208fc881..e047996f9aa8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -217,10 +217,14 @@ void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
}
void GISelCSEInfo::handleRecordedInsts() {
+ if (HandlingRecordedInstrs)
+ return;
+ HandlingRecordedInstrs = true;
while (!TemporaryInsts.empty()) {
auto *MI = TemporaryInsts.pop_back_val();
handleRecordedInst(MI);
}
+ HandlingRecordedInstrs = false;
}
bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
@@ -392,9 +396,10 @@ GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
addNodeIDRegType(Ty);
if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ if (const auto *RB = dyn_cast_if_present<const RegisterBank *>(RCOrRB))
addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ else if (const auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RCOrRB))
addNodeIDRegType(RC);
}
return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 89872259cfca..28c33e2038e4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -846,7 +846,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
- LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -876,8 +876,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
unsigned AS = DL.getAllocaAddrSpace();
- LLT OffsetLLTy =
- getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL);
MachinePointerInfo PtrInfo(AS);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index af4bb1634746..cc7fb3ee1109 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -399,7 +399,8 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
-PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
+ PreferredTuple &CurrentUse,
const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
@@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
- // more expensive.
- if (CurrentUse.Ty == TyForCandidate) {
+ // more expensive. Don't do this if the load is already a zero-extend load
+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend
+ // later.
+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
@@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// For non power-of-2 types, they will very likely be legalized into multiple
// loads. Don't bother trying to match them into extending loads.
- if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
return false;
// Find the preferred type aside from the any-extends (unless it's the only
@@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
.Action != LegalizeActions::Legal)
continue;
}
- Preferred = ChoosePreferredUse(Preferred,
+ Preferred = ChoosePreferredUse(MI, Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
@@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
- unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+ unsigned MaskSizeBits = MaskVal.countr_one();
// The mask may not be larger than the in-memory type, as it might cover sign
// extended bits
@@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
// Check which instruction is first in the block so we don't break def-use
- // deps by "moving" the instruction incorrectly.
- if (dominates(MI, *OtherMI))
+ // deps by "moving" the instruction incorrectly. Also keep track of which
+ // instruction is first so we pick it's operands, avoiding use-before-def
+ // bugs.
+ MachineInstr *FirstInst;
+ if (dominates(MI, *OtherMI)) {
Builder.setInstrAndDebugLoc(MI);
- else
+ FirstInst = &MI;
+ } else {
Builder.setInstrAndDebugLoc(*OtherMI);
+ FirstInst = OtherMI;
+ }
Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
: TargetOpcode::G_UDIVREM,
{DestDivReg, DestRemReg},
- {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });
MI.eraseFromParent();
OtherMI->eraseFromParent();
}
@@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
LegalizerHelper::LegalizeResult::Legalized;
}
-static std::optional<APFloat>
-constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
- const MachineRegisterInfo &MRI) {
- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
- if (!MaybeCst)
- return std::nullopt;
-
- APFloat V = MaybeCst->getValueAPF();
- switch (Opcode) {
+static APFloat constantFoldFpUnary(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const APFloat &Val) {
+ APFloat Result(Val);
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_FNEG: {
- V.changeSign();
- return V;
+ Result.changeSign();
+ return Result;
}
case TargetOpcode::G_FABS: {
- V.clearSign();
- return V;
+ Result.clearSign();
+ return Result;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ bool Unused;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return Result;
}
- case TargetOpcode::G_FPTRUNC:
- break;
case TargetOpcode::G_FSQRT: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(sqrt(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(sqrt(Result.convertToDouble()));
break;
}
case TargetOpcode::G_FLOG2: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(log2(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(log2(Result.convertToDouble()));
break;
}
}
// Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
- // and `G_FLOG2` reach here.
+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
+ // `G_FLOG2` reach here.
bool Unused;
- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
- return V;
+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
+ return Result;
}
-bool CombinerHelper::matchCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.has_value();
-}
-
-void CombinerHelper::applyCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- assert(Cst && "Optional is unexpectedly empty!");
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ const ConstantFP *Cst) {
Builder.setInstrAndDebugLoc(MI);
- MachineFunction &MF = Builder.getMF();
- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
- Register DstReg = MI.getOperand(0).getReg();
- Builder.buildFConstant(DstReg, *FPVal);
+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
+ Builder.buildFConstant(MI.getOperand(0), *NewCst);
MI.eraseFromParent();
}
@@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ auto &Shl = cast<GenericMachineInstr>(MI);
+ Register DstReg = Shl.getReg(0);
+ Register SrcReg = Shl.getReg(1);
+ Register ShiftReg = Shl.getReg(2);
+ Register X, C1;
+
+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
+ return false;
+
+ if (!mi_match(SrcReg, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),
+ m_GOr(m_Reg(X), m_Reg(C1))))))
+ return false;
+
+ APInt C1Val, C2Val;
+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
+ return false;
+
+ auto *SrcDef = MRI.getVRegDef(SrcReg);
+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
+ LLT SrcTy = MRI.getType(SrcReg);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);
+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
@@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
return false;
- // TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return false;
}
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
MatchData.Reg = ExtSrc;
MatchData.Imm = ShiftAmt;
- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
- return MinLeadingZeros >= ShiftAmt;
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
}
void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
@@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
if (CanReuseInputDirectly)
replaceRegWith(MRI, DstReg, SrcReg);
else
@@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
return true;
}
-bool CombinerHelper::eraseInst(MachineInstr &MI) {
- MI.eraseFromParent();
- return true;
-}
+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
@@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
-bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
@@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
Register Replacement) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
@@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
}
-bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildUndef(MI.getOperand(0));
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchSimplifyAddToSub(
@@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
Register Y = RightHandInst->getOperand(1).getReg();
LLT XTy = MRI.getType(X);
LLT YTy = MRI.getType(Y);
- if (XTy != YTy)
- return false;
- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ if (!XTy.isValid() || XTy != YTy)
return false;
// Optional extra source register.
@@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
}
}
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
// Record the steps to build the new instructions.
//
// Steps to build (logic x, y)
@@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
/// \p SelectOperand is the operand in binary operator \p MI that is the select
/// to fold.
-bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
const unsigned &SelectOperand) {
Builder.setInstrAndDebugLoc(MI);
@@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
MI.eraseFromParent();
-
- return true;
}
std::optional<SmallVector<Register, 8>>
@@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-/// Check if the store \p Store is a truncstore that can be merged. That is,
-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
-/// Register then it does not need to match and SrcVal is set to the source
-/// value found.
-/// On match, returns the start byte offset of the \p SrcVal that is being
-/// stored.
-static std::optional<int64_t>
-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
- Register TruncVal;
- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return std::nullopt;
-
- // The shift amount must be a constant multiple of the narrow type.
- // It is translated to the offset address in the wide source value "y".
- //
- // x = G_LSHR y, ShiftAmtC
- // s8 z = G_TRUNC x
- // store z, ...
- Register FoundSrcVal;
- int64_t ShiftAmt;
- if (!mi_match(TruncVal, MRI,
- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
- if (!SrcVal.isValid() || TruncVal == SrcVal) {
- if (!SrcVal.isValid())
- SrcVal = TruncVal;
- return 0; // If it's the lowest index store.
- }
- return std::nullopt;
- }
-
- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
- if (ShiftAmt % NarrowBits!= 0)
- return std::nullopt;
- const unsigned Offset = ShiftAmt / NarrowBits;
-
- if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return std::nullopt;
-
- if (!SrcVal.isValid())
- SrcVal = FoundSrcVal;
- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return std::nullopt;
- return Offset;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
- auto &StoreMI = cast<GStore>(MI);
- LLT MemTy = StoreMI.getMMO().getMemoryType();
-
- // We only handle merging simple stores of 1-4 bytes.
- if (!MemTy.isScalar())
- return false;
- switch (MemTy.getSizeInBits()) {
- case 8:
- case 16:
- case 32:
- break;
- default:
- return false;
- }
- if (!StoreMI.isSimple())
- return false;
-
- // We do a simple search for mergeable stores prior to this one.
- // Any potential alias hazard along the way terminates the search.
- SmallVector<GStore *> FoundStores;
-
- // We're looking for:
- // 1) a (store(trunc(...)))
- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
- // the partial value stored.
- // 3) where the offsets form either a little or big-endian sequence.
-
- auto &LastStore = StoreMI;
-
- // The single base pointer that all stores must use.
- Register BaseReg;
- int64_t LastOffset;
- if (!mi_match(LastStore.getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
- BaseReg = LastStore.getPointerReg();
- LastOffset = 0;
- }
-
- GStore *LowestIdxStore = &LastStore;
- int64_t LowestIdxOffset = LastOffset;
-
- Register WideSrcVal;
- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
- if (!LowestShiftAmt)
- return false; // Didn't match a trunc.
- assert(WideSrcVal.isValid());
-
- LLT WideStoreTy = MRI.getType(WideSrcVal);
- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
- return false;
- const unsigned NumStoresRequired =
- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
-
- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
- OffsetMap[*LowestShiftAmt] = LastOffset;
- FoundStores.emplace_back(&LastStore);
-
- // Search the block up for more stores.
- // We use a search threshold of 10 instructions here because the combiner
- // works top-down within a block, and we don't want to search an unbounded
- // number of predecessor instructions trying to find matching stores.
- // If we moved this optimization into a separate pass then we could probably
- // use a more efficient search without having a hard-coded threshold.
- const int MaxInstsToCheck = 10;
- int NumInstsChecked = 0;
- for (auto II = ++LastStore.getReverseIterator();
- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
- ++II) {
- NumInstsChecked++;
- GStore *NewStore;
- if ((NewStore = dyn_cast<GStore>(&*II))) {
- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
- break;
- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
- break;
- } else {
- continue; // This is a safe instruction we can look past.
- }
-
- Register NewBaseReg;
- int64_t MemOffset;
- // Check we're storing to the same base + some offset.
- if (!mi_match(NewStore->getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
- NewBaseReg = NewStore->getPointerReg();
- MemOffset = 0;
- }
- if (BaseReg != NewBaseReg)
- break;
-
- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
- if (!ShiftByteOffset)
- break;
- if (MemOffset < LowestIdxOffset) {
- LowestIdxOffset = MemOffset;
- LowestIdxStore = NewStore;
- }
-
- // Map the offset in the store and the offset in the combined value, and
- // early return if it has been set before.
- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
- OffsetMap[*ShiftByteOffset] != INT64_MAX)
- break;
- OffsetMap[*ShiftByteOffset] = MemOffset;
-
- FoundStores.emplace_back(NewStore);
- // Reset counter since we've found a matching inst.
- NumInstsChecked = 0;
- if (FoundStores.size() == NumStoresRequired)
- break;
- }
-
- if (FoundStores.size() != NumStoresRequired) {
- return false;
- }
-
- const auto &DL = LastStore.getMF()->getDataLayout();
- auto &C = LastStore.getMF()->getFunction().getContext();
- // Check that a store of the wide type is both allowed and fast on the target
- unsigned Fast = 0;
- bool Allowed = getTargetLowering().allowsMemoryAccess(
- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
- if (!Allowed || !Fast)
- return false;
-
- // Check if the pieces of the value are going to the expected places in memory
- // to merge the stores.
- unsigned NarrowBits = MemTy.getScalarSizeInBits();
- auto checkOffsets = [&](bool MatchLittleEndian) {
- if (MatchLittleEndian) {
- for (unsigned i = 0; i != NumStoresRequired; ++i)
- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- } else { // MatchBigEndian by reversing loop counter.
- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
- ++i, --j)
- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- }
- return true;
- };
-
- // Check if the offsets line up for the native data layout of this target.
- bool NeedBswap = false;
- bool NeedRotate = false;
- if (!checkOffsets(DL.isLittleEndian())) {
- // Special-case: check if byte offsets line up for the opposite endian.
- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
- NeedBswap = true;
- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
- NeedRotate = true;
- else
- return false;
- }
-
- if (NeedBswap &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
- return false;
- if (NeedRotate &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
- return false;
-
- MatchInfo.NeedBSwap = NeedBswap;
- MatchInfo.NeedRotate = NeedRotate;
- MatchInfo.LowestIdxStore = LowestIdxStore;
- MatchInfo.WideSrcVal = WideSrcVal;
- MatchInfo.FoundStores = std::move(FoundStores);
- return true;
-}
-
-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
-
- Builder.setInstrAndDebugLoc(MI);
- Register WideSrcVal = MatchInfo.WideSrcVal;
- LLT WideStoreTy = MRI.getType(WideSrcVal);
-
- if (MatchInfo.NeedBSwap) {
- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
- } else if (MatchInfo.NeedRotate) {
- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
- "Unexpected type for rotate");
- auto RotAmt =
- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
- WideSrcVal =
- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
- }
-
- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
- MatchInfo.LowestIdxStore->getMMO().getAlign());
-
- // Erase the old stores.
- for (auto *ST : MatchInfo.FoundStores)
- ST->eraseFromParent();
-}
-
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
- uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
+ uint64_t Width = APInt(Size, AndImm).countr_one();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
@@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
// Calculate start position and width of the extract.
const int64_t Pos = ShrAmt;
- const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
// It's preferable to keep the shift, rather than form G_SBFX.
// TODO: remove the G_AND via demanded bits analysis.
@@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
return false;
}
+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
+ Register OpLHS, Register OpRHS,
+ BuildFnTy &MatchInfo) {
+ LLT OpRHSTy = MRI.getType(OpRHS);
+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
+
+ if (OpLHSDef->getOpcode() != Opc)
+ return false;
+
+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
+
+ // If the inner op is (X op C), pull the constant out so it can be folded with
+ // other constants in the expression tree. Folding is not guaranteed so we
+ // might have (C1 op C2). In that case do not pull a constant out because it
+ // won't help and can lead to infinite loops.
+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
+ };
+ return true;
+ }
+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // We don't check if the reassociation will break a legal addressing mode
+ // here since pointer arithmetic is handled by G_PTR_ADD.
+ unsigned Opc = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
+ return true;
+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
+ return true;
+ return false;
+}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
@@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
return false;
// No point in combining if there's nothing to truncate.
- unsigned NarrowWidth = Mask.countTrailingOnes();
+ unsigned NarrowWidth = Mask.countr_one();
if (NarrowWidth == WideTy.getSizeInBits())
return false;
LLT NarrowTy = LLT::scalar(NarrowWidth);
@@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
// TODO: Use undef values for divisor of 1.
- if (!Divisor.isOneValue()) {
+ if (!Divisor.isOne()) {
UnsignedDivisionByConstantInfo magics =
UnsignedDivisionByConstantInfo::get(Divisor);
@@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
auto *CI = cast<ConstantInt>(C);
APInt Divisor = CI->getValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
new file mode 100644
index 000000000000..d747cbf5aadc
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -0,0 +1,68 @@
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the GIMatchTableExecutor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "gi-match-table-executor"
+
+using namespace llvm;
+
+GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers) {}
+
+GIMatchTableExecutor::GIMatchTableExecutor() = default;
+
+bool GIMatchTableExecutor::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+ if (MO.isReg() && MO.getReg())
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value.getSExtValue() == Value;
+ return false;
+}
+
+bool GIMatchTableExecutor::isBaseWithConstantOffset(
+ const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+ if (!Root.isReg())
+ return false;
+
+ MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+ if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ return true;
+}
+
+bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
+ MachineInstr &IntoMI) const {
+ // Immediate neighbours are already folded.
+ if (MI.getParent() == IntoMI.getParent() &&
+ std::next(MI.getIterator()) == IntoMI.getIterator())
+ return true;
+
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index bfbe7e1c3e55..363ffbfa90b5 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -115,7 +116,7 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
@@ -191,7 +192,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
// Known bits are the values that are shared by every demanded element.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -235,10 +236,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
- if (Known.One == 0 && Known.Zero == 0)
+ if (Known.isUnknown())
break;
} else {
// We know nothing.
@@ -750,7 +751,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
// Okay, we know that the sign bit in Mask is set. Use CLO to determine
// the number of identical bits in the top of the input value.
Mask <<= Mask.getBitWidth() - TyBits;
- return std::max(FirstAnswer, Mask.countLeadingOnes());
+ return std::max(FirstAnswer, Mask.countl_one());
}
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7d811dc0ad8f..9a67a8d05a4d 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,6 +44,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -74,7 +76,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -300,7 +301,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -314,7 +315,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -345,7 +346,7 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (CI)
Flags = MachineInstr::copyFlagsFromInstruction(*CI);
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
@@ -844,8 +845,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
- if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
- CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() &&
+ CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
@@ -1018,7 +1019,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
LLT MaskTy = SwitchOpTy;
if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
- !isPowerOf2_32(MaskTy.getSizeInBits()))
+ !llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits()))
MaskTy = LLT::scalar(PtrTy.getSizeInBits());
else {
// Ensure that the type will fit the mask value.
@@ -1074,14 +1075,14 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
auto MaskTrailingZeros =
- MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
Cmp =
MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
.getReg(0);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
auto MaskTrailingOnes =
- MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
.getReg(0);
} else {
@@ -1294,7 +1295,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
AAMDNodes AAInfo = LI.getAAMetadata();
const Value *Ptr = LI.getPointerOperand();
- Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType());
+ Type *OffsetIRTy = DL->getIndexType(Ptr->getType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
@@ -1342,7 +1343,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
Register Base = getOrCreateVReg(*SI.getPointerOperand());
- Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+ Type *OffsetIRTy = DL->getIndexType(SI.getPointerOperandType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
@@ -1438,7 +1439,7 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*SI);
@@ -1468,8 +1469,14 @@ bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
- getLLTForType(*U.getType(), *DL))
+ getLLTForType(*U.getType(), *DL)) {
+ // If the source is a ConstantInt then it was probably created by
+ // ConstantHoisting and we should leave it alone.
+ if (isa<ConstantInt>(U.getOperand(0)))
+ return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
+ MIRBuilder);
return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ }
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}
@@ -1488,7 +1495,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
- Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
// Normalize Vector GEP - all scalar operands should be converted to the
@@ -1513,7 +1520,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
.getReg(0);
PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
PtrTy = getLLTForType(*PtrIRTy, *DL);
- OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ OffsetIRTy = DL->getIndexType(PtrIRTy);
OffsetTy = getLLTForType(*OffsetIRTy, *DL);
}
@@ -1759,6 +1766,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FLOG2;
case Intrinsic::log10:
return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
case Intrinsic::nearbyint:
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
@@ -1851,6 +1860,8 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_STRICT_FMA;
case Intrinsic::experimental_constrained_sqrt:
return TargetOpcode::G_STRICT_FSQRT;
+ case Intrinsic::experimental_constrained_ldexp:
+ return TargetOpcode::G_STRICT_FLDEXP;
default:
return 0;
}
@@ -1864,7 +1875,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
if (!Opcode)
return false;
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags |= MachineInstr::NoFPExcept;
@@ -1879,6 +1890,60 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
return true;
}
+std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) {
+ auto VRegs = getOrCreateVRegs(Arg);
+ if (VRegs.size() != 1)
+ return std::nullopt;
+
+ // Arguments are lowered as a copy of a livein physical register.
+ auto *VRegDef = MF->getRegInfo().getVRegDef(VRegs[0]);
+ if (!VRegDef || !VRegDef->isCopy())
+ return std::nullopt;
+ return VRegDef->getOperand(1).getReg().asMCReg();
+}
+
+bool IRTranslator::translateIfEntryValueArgument(const DbgValueInst &DebugInst,
+ MachineIRBuilder &MIRBuilder) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getValue());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg) {
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << DebugInst << "\n");
+ return true;
+ }
+
+ MIRBuilder.buildDirectDbgValue(*PhysReg, DebugInst.getVariable(),
+ DebugInst.getExpression());
+ return true;
+}
+
+bool IRTranslator::translateIfEntryValueArgument(
+ const DbgDeclareInst &DebugInst) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getAddress());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg)
+ return false;
+
+ MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
+ DebugInst.getDebugLoc());
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
@@ -1945,12 +2010,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// instructions (in fact, they get ignored if they *do* exist).
MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
getOrCreateFrameIndex(*AI), DI.getDebugLoc());
- } else {
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
- DI.getVariable(), DI.getExpression());
+ return true;
}
+
+ if (translateIfEntryValueArgument(DI))
+ return true;
+
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
return true;
}
case Intrinsic::dbg_label: {
@@ -1991,16 +2060,32 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
// terminate any prior location.
MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
- } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ return true;
+ }
+ if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
- } else {
- for (Register Reg : getOrCreateVRegs(*V)) {
- // FIXME: This does not handle register-indirect values at offset 0. The
- // direct/indirect thing shouldn't really be handled by something as
- // implicit as reg+noreg vs reg+imm in the first place, but it seems
- // pretty baked in right now.
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
- }
+ return true;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(V);
+ AI && AI->isStaticAlloca() && DI.getExpression()->startsWithDeref()) {
+ // If the value is an alloca and the expression starts with a
+ // dereference, track a stack slot instead of a register, as registers
+ // may be clobbered.
+ auto ExprOperands = DI.getExpression()->getElements();
+ auto *ExprDerefRemoved =
+ DIExpression::get(AI->getContext(), ExprOperands.drop_front());
+ MIRBuilder.buildFIDbgValue(getOrCreateFrameIndex(*AI), DI.getVariable(),
+ ExprDerefRemoved);
+ return true;
+ }
+ if (translateIfEntryValueArgument(DI, MIRBuilder))
+ return true;
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
}
return true;
}
@@ -2090,6 +2175,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getArgOperand(0)),
MachineInstr::copyFlagsFromInstruction(CI));
return true;
+ case Intrinsic::frexp: {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildFFrexp(VRegs[0], VRegs[1],
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
case Intrinsic::memcpy_inline:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
@@ -2296,7 +2388,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return CLI->lowerCall(MIRBuilder, Info);
}
case Intrinsic::fptrunc_round: {
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0357c50e555..3925611f1485 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm(
Inst.addReg(SourceRegs[0]);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass. Find a register that we can use.
+ // C_RegisterClass/C_Other.
assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Other);
+ // Find a register that we can use.
if (OpInfo.Regs.empty()) {
LLVM_DEBUG(dbgs()
<< "Couldn't allocate output register for constraint\n");
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index f780050ca3f1..9bbef11067ae 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
@@ -104,7 +105,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
- ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
+ ISel->setupMF(MF, KB, &CoverageInfo, PSI, BFI);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -165,12 +166,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Eliminate hints.
- if (isPreISelGenericOptimizationHint(MI.getOpcode())) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ // Eliminate hints or G_CONSTANT_FOLD_BARRIER.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode()) ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
- // At this point, the destination register class of the hint may have
+ // At this point, the destination register class of the op may have
// been decided.
//
// Propagate that through to the source register.
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 8959d215ecd1..c48591cc2f02 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -5,64 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file implements the InstructionSelector class.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-#define DEBUG_TYPE "instructionselector"
-
-using namespace llvm;
-
-InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
- : Renderers(MaxRenderers) {}
-
-InstructionSelector::InstructionSelector() = default;
-
-bool InstructionSelector::isOperandImmEqual(
- const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const {
- if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value.getSExtValue() == Value;
- return false;
-}
-
-bool InstructionSelector::isBaseWithConstantOffset(
- const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
- if (!Root.isReg())
- return false;
-
- MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
- if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
- return false;
-
- MachineOperand &RHS = RootI->getOperand(2);
- MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
- if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
- return false;
-
- return true;
-}
-bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
- MachineInstr &IntoMI) const {
- // Immediate neighbours are already folded.
- if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
- return true;
+namespace llvm {
- // Convergent instructions cannot be moved in the CFG.
- if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
- return false;
+// vtable anchor
+InstructionSelector::~InstructionSelector() = default;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
-}
+} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 54a82cac95d5..2c77ed8b0600 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -164,7 +164,8 @@ LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
- return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
+ return QueryTy.isScalar() &&
+ !llvm::has_single_bit<uint32_t>(QueryTy.getSizeInBits());
};
}
@@ -184,14 +185,16 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
+ return !llvm::has_single_bit<uint32_t>(
+ Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
}
LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
- return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes());
+ return !MemTy.isByteSized() ||
+ !llvm::has_single_bit<uint32_t>(MemTy.getSizeInBytes());
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1a13f39c100c..aecbe0b7604c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -75,6 +76,7 @@ INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
@@ -85,6 +87,8 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -173,7 +177,8 @@ Legalizer::MFResult
Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
ArrayRef<GISelChangeObserver *> AuxObservers,
LostDebugLocObserver &LocObserver,
- MachineIRBuilder &MIRBuilder) {
+ MachineIRBuilder &MIRBuilder,
+ GISelKnownBits *KB) {
MIRBuilder.setMF(MF);
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -212,7 +217,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
- LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
@@ -314,8 +319,6 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- const size_t NumBlocks = MF.size();
-
std::unique_ptr<MachineIRBuilder> MIRBuilder;
GISelCSEInfo *CSEInfo = nullptr;
bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
@@ -338,25 +341,18 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (VerifyDebugLocs > DebugLocVerifyLevel::None)
AuxObservers.push_back(&LocObserver);
+ // This allows Known Bits Analysis in the legalizer.
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
- MFResult Result =
- legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, LocObserver,
+ *MIRBuilder, KB);
if (Result.FailedOn) {
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
"unable to legalize instruction", *Result.FailedOn);
return false;
}
- // For now don't support if new blocks are inserted - we would need to fix the
- // outer loop for that.
- if (MF.size() != NumBlocks) {
- MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
- MF.getFunction().getSubprogram(),
- /*MBB=*/nullptr);
- R << "inserting blocks is not supported yet";
- reportGISelFailure(MF, TPC, MORE, R);
- return false;
- }
if (LocObserver.getNumLostDebugLocs()) {
MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8a1fce2d3d65..f0da0d88140f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,12 +15,14 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -102,13 +104,13 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
LI(*MF.getSubtarget().getLegalizerInfo()),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ MachineIRBuilder &B, GISelKnownBits *KB)
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
@@ -540,6 +542,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
+ case TargetOpcode::G_FLDEXP:
+ RTLIBCASE(LDEXP_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
@@ -824,6 +828,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
@@ -1411,6 +1416,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP:
+ return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
}
}
@@ -1504,13 +1512,11 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
if (DstTy.isVector())
return UnableToLegalize;
- Register Src1 = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src1);
+ LLT SrcTy = MRI.getType(Src1Reg);
const int DstSize = DstTy.getSizeInBits();
const int SrcSize = SrcTy.getSizeInBits();
const int WideSize = WideTy.getSizeInBits();
@@ -1522,7 +1528,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (WideSize >= DstSize) {
// Directly pack the bits in the target type.
- Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
@@ -1753,11 +1759,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
if (TypeIdx == 0) {
@@ -1978,10 +1980,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
}
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
- Register Result = MI.getOperand(0).getReg();
- Register OriginalOverflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
LLT SrcTy = MRI.getType(LHS);
LLT OverflowTy = MRI.getType(OriginalOverflow);
unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
@@ -2560,12 +2559,41 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
+ case TargetOpcode::G_FPOWI:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP: {
+ if (TypeIdx == 0) {
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 1) {
+ // For some reason SelectionDAG tries to promote to a libcall without
+ // actually changing the integer type for promotion.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
+ case TargetOpcode::G_FFREXP: {
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ } else {
+ widenScalarDst(MI, WideTy, 1);
+ }
+
Observer.changedInstr(MI);
return Legalized;
}
@@ -2631,12 +2659,34 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+LegalizerHelper::lowerFConstant(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ Align Alignment = Align(DL.getABITypeAlign(
+ getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
+
+ auto Addr = MIRBuilder.buildConstantPool(
+ AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
+ MI.getOperand(1).getFPImm(), Alignment));
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ MRI.getType(Dst), Alignment);
+
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy.isVector()) {
LLT SrcEltTy = SrcTy.getElementType();
SmallVector<Register, 8> SrcRegs;
@@ -2732,11 +2782,7 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Idx = MI.getOperand(2).getReg();
- LLT SrcVecTy = MRI.getType(SrcVec);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
LLT SrcEltTy = SrcVecTy.getElementType();
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
@@ -2872,13 +2918,9 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Val = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(3).getReg();
-
- LLT VecTy = MRI.getType(Dst);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
+ MI.getFirst4RegLLTs();
+ LLT VecTy = DstTy;
LLT VecEltTy = VecTy.getElementType();
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
@@ -3004,7 +3046,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (!isPowerOf2_32(MemSizeInBits)) {
// This load needs splitting into power of 2 sized loads.
- LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ LargeSplitSize = llvm::bit_floor(MemSizeInBits);
SmallSplitSize = MemSizeInBits - LargeSplitSize;
} else {
// This is already a power of 2, but we still need to split this in half.
@@ -3122,7 +3164,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
- LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
@@ -3250,6 +3292,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
switch(MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_FCONSTANT:
+ return lowerFConstant(MI);
case TargetOpcode::G_BITCAST:
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
@@ -3274,10 +3318,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.
- Register Res = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
@@ -3308,7 +3349,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, SubByReg] = MI.getFirst2Regs();
LLT Ty = MRI.getType(Res);
// TODO: Handle vector types once we are able to
@@ -3317,23 +3358,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return UnableToLegalize;
auto SignMask =
MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
- Register SubByReg = MI.getOperand(1).getReg();
MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB:
case TargetOpcode::G_STRICT_FSUB: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
- // First, check if G_FNEG is marked as Lower. If so, we may
- // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
- if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
- return UnableToLegalize;
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
@@ -3357,11 +3391,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- Register OldValRes = MI.getOperand(0).getReg();
- Register SuccessRes = MI.getOperand(1).getReg();
- Register Addr = MI.getOperand(2).getReg();
- Register CmpVal = MI.getOperand(3).getReg();
- Register NewVal = MI.getOperand(4).getReg();
+ auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -3381,10 +3411,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI);
case G_UADDO: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildAdd(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
@@ -3393,11 +3420,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_UADDE: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register CarryIn = MI.getOperand(4).getReg();
+ auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
LLT Ty = MRI.getType(Res);
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
@@ -3409,10 +3432,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBO: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildSub(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
@@ -3421,11 +3441,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBE: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register BorrowIn = MI.getOperand(4).getReg();
+ auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
@@ -3470,8 +3486,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
assert(MI.getOperand(2).isImm() && "Expected immediate");
int64_t SizeInBits = MI.getOperand(2).getImm();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
@@ -3869,9 +3884,7 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
// Requires compatible types. Otherwise user of DstReg did not perform unmerge
// that should have been artifact combined. Most likely instruction that uses
// DstReg has to do more/fewer elements legalization compatible with NarrowTy.
@@ -3958,8 +3971,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowVecTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
+ auto [DstReg, SrcVec] = MI.getFirst2Regs();
Register InsertVal;
bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
@@ -4159,6 +4171,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
+ case G_FLDEXP:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
@@ -4234,6 +4247,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STRICT_FSUB:
case G_STRICT_FMUL:
case G_STRICT_FMA:
+ case G_STRICT_FLDEXP:
+ case G_FFREXP:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -4278,13 +4293,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
+ MI.getFirst3RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
// The shuffle should be canonicalized by now.
if (DstTy != Src1Ty)
return UnableToLegalize;
@@ -4474,10 +4485,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
// The semantics of the normal non-sequential reductions allow us to freely
// re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
@@ -4865,6 +4873,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -4873,6 +4882,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
@@ -4887,10 +4897,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT: {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
+ auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
if (TypeIdx == 1) {
if (!CondTy.isScalar() ||
DstTy.getElementCount() != MoreTy.getElementCount())
@@ -4943,28 +4950,50 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ LLT SrcTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ moreElementsVectorSrc(MI, SrcTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
}
-/// Expand source vectors to the size of destination vector.
-static LegalizerHelper::LegalizeResult
-equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+LegalizerHelper::LegalizeResult
+LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
unsigned MaskNumElts = Mask.size();
unsigned SrcNumElts = SrcTy.getNumElements();
- Register DstReg = MI.getOperand(0).getReg();
LLT DestEltTy = DstTy.getElementType();
- // TODO: Normalize the shuffle vector since mask and vector length don't
- // match.
- if (MaskNumElts <= SrcNumElts) {
- return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ if (MaskNumElts == SrcNumElts)
+ return Legalized;
+
+ if (MaskNumElts < SrcNumElts) {
+ // Extend mask to match new destination vector size with
+ // undef values.
+ SmallVector<int, 16> NewMask(Mask);
+ for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
+ NewMask.push_back(-1);
+
+ moreElementsVectorDst(MI, SrcTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+
+ return Legalized;
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
@@ -5014,19 +5043,14 @@ equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
if (DstTy.isVector() && Src1Ty.isVector() &&
- DstTy.getNumElements() > Src1Ty.getNumElements()) {
- return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ DstTy.getNumElements() != Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI);
}
if (TypeIdx != 0)
@@ -5218,9 +5242,7 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
+ auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
LLT Ty = MRI.getType(DstReg);
if (Ty.isVector())
@@ -5471,8 +5493,7 @@ LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -5539,10 +5560,7 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5575,10 +5593,7 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5611,9 +5626,7 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5631,6 +5644,31 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ MachineIRBuilder &B = MIRBuilder;
+ Register ExpReg = MI.getOperand(2).getReg();
+ LLT ExpTy = MRI.getType(ExpReg);
+
+ unsigned ClampSize = NarrowTy.getScalarSizeInBits();
+
+ // Clamp the exponent to the range of the target type.
+ auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
+ auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
+ auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
+ auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
+
+ auto Trunc = B.buildTrunc(NarrowTy, Clamp);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Trunc.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
const auto &TII = MIRBuilder.getTII();
@@ -5649,10 +5687,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5699,10 +5734,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5808,10 +5840,7 @@ static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5850,10 +5879,7 @@ LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5932,10 +5958,7 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
@@ -5946,12 +5969,7 @@ LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
@@ -6021,8 +6039,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
// representation.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
@@ -6077,10 +6094,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy == LLT::scalar(1)) {
auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
@@ -6105,10 +6119,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6151,10 +6162,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6194,10 +6202,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6263,17 +6268,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+
+ auto [Dst, Src] = MI.getFirst2Regs();
+ assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
+ MRI.getType(Src).getScalarType() == LLT::scalar(64));
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
+ if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ unsigned Flags = MI.getFlags();
+ auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
+ MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);
@@ -6368,11 +6383,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
-
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [DstTy, SrcTy] = MI.getFirst2LLTs();
const LLT S64 = LLT::scalar(64);
const LLT S16 = LLT::scalar(16);
@@ -6385,9 +6396,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
@@ -6412,9 +6421,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
LLT CmpType = MRI.getType(Dst).changeElementSize(1);
@@ -6428,13 +6435,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
-
- const LLT Src0Ty = MRI.getType(Src0);
- const LLT Src1Ty = MRI.getType(Src1);
-
+ auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
const int Src0Size = Src0Ty.getScalarSizeInBits();
const int Src1Size = Src1Ty.getScalarSizeInBits();
@@ -6475,9 +6476,7 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
if (!MI.getFlag(MachineInstr::FmNoNans)) {
@@ -6516,8 +6515,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
+ auto [DstReg, X] = MI.getFirst2Regs();
const unsigned Flags = MI.getFlags();
const LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6547,10 +6545,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFFloor(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
unsigned Flags = MI.getFlags();
LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6577,11 +6573,8 @@ LegalizerHelper::lowerFFloor(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
const unsigned NumOps = MI.getNumOperands();
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
- unsigned PartSize = SrcTy.getSizeInBits();
+ auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
+ unsigned PartSize = Src0Ty.getSizeInBits();
LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
@@ -6729,11 +6722,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- Register Src1Reg = MI.getOperand(2).getReg();
- LLT Src0Ty = MRI.getType(Src0Reg);
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
+ MI.getFirst3RegLLTs();
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
@@ -6822,13 +6812,9 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
-
// Extract sub-vector or one element
if (SrcTy.isVector()) {
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
@@ -6837,7 +6823,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
(Offset + DstSize <= SrcTy.getSizeInBits())) {
// Unmerge and allow access to each Src element for the artifact combiner.
- auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
// Take element(s) we need to extract and copy it (merge them).
SmallVector<Register, 8> SubVectorElts;
@@ -6846,9 +6832,9 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
SubVectorElts.push_back(Unmerge.getReg(Idx));
}
if (SubVectorElts.size() == 1)
- MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
else
- MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
MI.eraseFromParent();
return Legalized;
@@ -6861,15 +6847,15 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT SrcIntTy = SrcTy;
if (!SrcTy.isScalar()) {
SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
- Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
+ SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
}
if (Offset == 0)
- MIRBuilder.buildTrunc(Dst, Src);
+ MIRBuilder.buildTrunc(DstReg, SrcReg);
else {
auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
- auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
- MIRBuilder.buildTrunc(Dst, Shr);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shr);
}
MI.eraseFromParent();
@@ -6880,9 +6866,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register InsertSrc = MI.getOperand(2).getReg();
+ auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
uint64_t Offset = MI.getOperand(3).getImm();
LLT DstTy = MRI.getType(Src);
@@ -6972,14 +6956,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
- Register Dst0 = MI.getOperand(0).getReg();
- Register Dst1 = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
+ MI.getFirst4RegLLTs();
const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
- LLT Ty = MRI.getType(Dst0);
- LLT BoolTy = MRI.getType(Dst1);
+ LLT Ty = Dst0Ty;
+ LLT BoolTy = Dst1Ty;
if (IsAdd)
MIRBuilder.buildAdd(Dst0, LHS, RHS);
@@ -7008,9 +6990,7 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
bool IsSigned;
bool IsAdd;
@@ -7085,9 +7065,7 @@ LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
bool IsSigned;
@@ -7157,9 +7135,7 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
"Expected shlsat opcode!");
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
@@ -7185,10 +7161,8 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBswap(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
@@ -7233,8 +7207,7 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getSizeInBits();
@@ -7312,23 +7285,23 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- uint64_t Mask = MI.getOperand(2).getImm();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
- if (Mask == 0) {
+ if (Mask == fcNone) {
MIRBuilder.buildConstant(DstReg, 0);
MI.eraseFromParent();
return Legalized;
}
- if ((Mask & fcAllFlags) == fcAllFlags) {
+ if (Mask == fcAllFlags) {
MIRBuilder.buildConstant(DstReg, 1);
MI.eraseFromParent();
return Legalized;
}
+ // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+ // version
+
unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
@@ -7345,7 +7318,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
APInt QNaNBitMask =
APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
- APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
+ APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
@@ -7358,8 +7331,10 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ // Clang doesn't support capture of structured bindings:
+ LLT DstTyCopy = DstTy;
const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
- Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
+ Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
};
// Tests that involve more than one class should be processed first.
@@ -7382,8 +7357,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Mask &= ~fcNegFinite;
}
+ if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ // TODO: Handle inverted case
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ ExpBits, ZeroC));
+ Mask &= ~PartialCheck;
+ }
+ }
+
// Check for individual classes.
- if (unsigned PartialCheck = Mask & fcZero) {
+ if (FPClassTest PartialCheck = Mask & fcZero) {
if (PartialCheck == fcPosZero)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, ZeroC));
@@ -7395,7 +7382,21 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
AsInt, SignBitC));
}
- if (unsigned PartialCheck = Mask & fcInf) {
+ if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcInf) {
if (PartialCheck == fcPosInf)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, InfC));
@@ -7410,7 +7411,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcNan) {
+ if (FPClassTest PartialCheck = Mask & fcNan) {
auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
if (PartialCheck == fcNan) {
// isnan(V) ==> abs(V) u> int(inf)
@@ -7431,21 +7432,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
- auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
- auto OneC = MIRBuilder.buildConstant(IntTy, 1);
- auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
- auto SubnormalRes =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
- MIRBuilder.buildConstant(IntTy, AllOneMantissa));
- if (PartialCheck == fcNegSubnormal)
- SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
- appendToRes(SubnormalRes);
- }
-
- if (unsigned PartialCheck = Mask & fcNormal) {
+ if (FPClassTest PartialCheck = Mask & fcNormal) {
// isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
// (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
@@ -7472,12 +7459,8 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
- Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
- Register Op1Reg = MI.getOperand(2).getReg();
- Register Op2Reg = MI.getOperand(3).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT MaskTy = MRI.getType(MaskReg);
+ auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
+ MI.getFirst4RegLLTs();
if (!DstTy.isVector())
return UnableToLegalize;
@@ -7591,7 +7574,7 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
Observer.changedInstr(MI);
return Legalized;
}
- return UnableToLegalize;;
+ return UnableToLegalize;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -7638,7 +7621,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// SDAGisms map cleanly to GISel concepts.
if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");
@@ -7826,9 +7809,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
const auto *MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -8091,9 +8072,7 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4b6c3a156709..1f2e481c63e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 7c6eac8c8ce0..49f40495d6fc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -10,6 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -18,7 +20,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -305,7 +307,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
const auto &DL = MF->getFunction().getParent()->getDataLayout();
bool AnyMerged = false;
do {
- unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
// Compute the biggest store we can generate to handle the number of stores.
unsigned MergeSizeBits;
@@ -400,7 +402,9 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
auto NewStore =
Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
(void) NewStore;
- LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
+ << " stores into merged store: " << *NewStore);
+ LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;);
NumStoresMerged += Stores.size();
MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
@@ -445,20 +449,19 @@ bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
for (auto AliasInfo : reverse(C.PotentialAliases)) {
MachineInstr *PotentialAliasOp = AliasInfo.first;
unsigned PreCheckedIdx = AliasInfo.second;
- if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
- // Need to check this alias.
- if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
- AA)) {
- LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
- << " detected\n");
- return true;
- }
- } else {
+ if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
// Once our store index is lower than the index associated with the
// potential alias, we know that we've already checked for this alias
// and all of the earlier potential aliases too.
return false;
}
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
}
return false;
};
@@ -616,11 +619,304 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
return Changed;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return std::nullopt;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return std::nullopt;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits != 0)
+ return std::nullopt;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return std::nullopt;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return std::nullopt;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores) {
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI->getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ if (FoundStores.size() == 1)
+ return false;
+ // We didn't find enough stores to merge into the size of the original
+ // source value, but we may be able to generate a smaller store if we
+ // truncate the source value.
+ WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
+ }
+
+ unsigned NumStoresFound = FoundStores.size();
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed = TLI->allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresFound; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
+ return false;
+
+ Builder.setInstrAndDebugLoc(StoreMI);
+
+ if (WideStoreTy != MRI->getType(WideSrcVal))
+ WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
+
+ if (NeedBswap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
+ LowestIdxStore->getMMO().getPointerInfo(),
+ LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : FoundStores) {
+ ST->eraseFromParent();
+ DeletedStores.insert(ST);
+ }
+ return true;
+}
+
+bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
+ bool Changed = false;
+ SmallVector<GStore *, 16> Stores;
+ SmallPtrSet<GStore *, 8> DeletedStores;
+ // Walk up the block so we can see the most eligible stores.
+ for (MachineInstr &MI : llvm::reverse(BB))
+ if (auto *StoreMI = dyn_cast<GStore>(&MI))
+ Stores.emplace_back(StoreMI);
+
+ for (auto *StoreMI : Stores) {
+ if (DeletedStores.count(StoreMI))
+ continue;
+ if (mergeTruncStore(*StoreMI, DeletedStores))
+ Changed = true;
+ }
+ return Changed;
+}
+
bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
bool Changed = false;
- for (auto &BB : MF) {
+ for (auto &BB : MF){
Changed |= mergeBlockStores(BB);
+ Changed |= mergeTruncStoresBlock(BB);
+ }
+
+ // Erase all dead instructions left over by the merging.
+ if (Changed) {
+ for (auto &BB : MF) {
+ for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
+ if (isTriviallyDead(I, *MRI))
+ I.eraseFromParent();
+ }
+ }
}
+
return Changed;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index bf4dcc2c2459..55984423e5bc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -54,7 +54,7 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
MachineInstr &MIUse = *MOUse.getParent();
InsertMBB = MIUse.getParent();
if (MIUse.isPHI())
- InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
+ InsertMBB = MIUse.getOperand(MOUse.getOperandNo() + 1).getMBB();
return InsertMBB == Def.getParent();
}
@@ -99,7 +99,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ << " #Opd: " << MOUse.getOperandNo() << '\n');
if (isLocalUse(MOUse, MI, InsertMBB)) {
// Even if we're in the same block, if the block is very large we could
// still have many long live ranges. Try to do intra-block localization
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9100e064f30f..962b54ec5d6b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -80,11 +80,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- return buildInstr(TargetOpcode::DBG_VALUE)
- .addFrameIndex(FI)
- .addImm(0)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ return insertInstr(buildInstrNoInsert(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -164,6 +164,15 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildConstantPool(const DstOp &Res,
+ unsigned Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT_POOL);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addConstantPoolIndex(Idx);
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
unsigned JTI) {
return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
@@ -229,17 +238,25 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());
- assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
- assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
- "Different vector element types");
- assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
- "Op0 has more elements");
+ assert(ResTy.isVector() && "Res non vector type");
- auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
SmallVector<Register, 8> Regs;
- for (auto Op : Unmerge.getInstr()->defs())
- Regs.push_back(Op.getReg());
- Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ if (Op0Ty.isVector()) {
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ } else {
+ assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) &&
+ "Op0 has more size");
+ Regs.push_back(Op0.getReg());
+ }
+ Register Undef =
+ buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0);
unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(Undef);
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 080f3ca540f2..885a1056b2ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -69,8 +69,8 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
false)
-RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), OptMode(RunningMode) {
+RegBankSelect::RegBankSelect(char &PassID, Mode RunningMode)
+ : MachineFunctionPass(PassID), OptMode(RunningMode) {
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
@@ -162,8 +162,10 @@ bool RegBankSelect::repairReg(
MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
.addDef(Dst)
.addUse(Src);
- LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':'
+ << printRegClassOrBank(Src, *MRI, TRI)
+ << " to: " << printReg(Dst) << ':'
+ << printRegClassOrBank(Dst, *MRI, TRI) << '\n');
} else {
// TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
// sequence.
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 07448548c295..080600d3cc98 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -230,10 +230,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
- for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
-
+ for (const auto &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
return false;
@@ -711,14 +708,14 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
const MachinePointerInfo &MPO) {
- auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>();
+ auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V);
if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()),
MPO.Offset);
}
- if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
+ if (const Value *V = dyn_cast_if_present<const Value *>(MPO.V)) {
const Module *M = MF.getFunction().getParent();
return V->getPointerAlignment(M->getDataLayout());
}
@@ -797,7 +794,7 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
return std::nullopt;
- return MaybeCst->countLeadingZeros();
+ return MaybeCst->countl_zero();
};
if (Ty.isVector()) {
// Try to constant fold each element.
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 2ccf2def48f8..f259cbc1d788 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -67,7 +67,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/BasicBlock.h"
@@ -92,6 +91,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -156,7 +156,7 @@ namespace {
/// Whether we should merge global variables that have external linkage.
bool MergeExternalGlobals = false;
- bool IsMachO;
+ bool IsMachO = false;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
@@ -652,6 +652,14 @@ bool GlobalMerge::doInitialization(Module &M) {
if (isMustKeepGlobalVariable(&GV))
continue;
+ // Don't merge tagged globals, as each global should have its own unique
+ // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
+ // with compatible alignment and the same contents may be merged as long as
+ // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
+ // size_b / 16`).
+ if (GV.isTagged())
+ continue;
+
Type *Ty = GV.getValueType();
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TM &&
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 258ad1931b12..e7b14d700a44 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,8 +15,10 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -115,12 +117,12 @@ namespace {
using TTI = TargetTransformInfo;
- class HardwareLoops : public FunctionPass {
+ class HardwareLoopsLegacy : public FunctionPass {
public:
static char ID;
- HardwareLoops() : FunctionPass(ID) {
- initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ HardwareLoopsLegacy() : FunctionPass(ID) {
+ initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -131,29 +133,44 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
}
+ };
+
+ class HardwareLoopsImpl {
+ public:
+ HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
+ DominatorTree &DT, const DataLayout &DL,
+ const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
+ AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts)
+ : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
+ TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
+ bool run(Function &F);
+
+ private:
// Try to convert the given Loop into a hardware loop.
- bool TryConvertLoop(Loop *L);
+ bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
// Given that the target believes the loop to be profitable, try to
// convert it.
bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
- private:
- ScalarEvolution *SE = nullptr;
- LoopInfo *LI = nullptr;
- const DataLayout *DL = nullptr;
- OptimizationRemarkEmitter *ORE = nullptr;
- const TargetTransformInfo *TTI = nullptr;
- DominatorTree *DT = nullptr;
- bool PreserveLCSSA = false;
- AssumptionCache *AC = nullptr;
- TargetLibraryInfo *LibInfo = nullptr;
- Module *M = nullptr;
+ ScalarEvolution &SE;
+ LoopInfo &LI;
+ bool PreserveLCSSA;
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+ TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache &AC;
+ OptimizationRemarkEmitter *ORE;
+ HardwareLoopOptions &Opts;
bool MadeChange = false;
};
@@ -182,8 +199,9 @@ namespace {
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
const DataLayout &DL,
- OptimizationRemarkEmitter *ORE) :
- SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
+ OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts) :
+ SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
@@ -197,6 +215,7 @@ namespace {
ScalarEvolution &SE;
const DataLayout &DL;
OptimizationRemarkEmitter *ORE = nullptr;
+ HardwareLoopOptions &Opts;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
@@ -209,40 +228,83 @@ namespace {
};
}
-char HardwareLoops::ID = 0;
+char HardwareLoopsLegacy::ID = 0;
-bool HardwareLoops::runOnFunction(Function &F) {
+bool HardwareLoopsLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &DL = F.getParent()->getDataLayout();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- M = F.getParent();
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ HardwareLoopOptions Opts;
+ if (ForceHardwareLoops.getNumOccurrences())
+ Opts.setForce(ForceHardwareLoops);
+ if (ForceHardwareLoopPHI.getNumOccurrences())
+ Opts.setForcePhi(ForceHardwareLoopPHI);
+ if (ForceNestedLoop.getNumOccurrences())
+ Opts.setForceNested(ForceNestedLoop);
+ if (ForceGuardLoopEntry.getNumOccurrences())
+ Opts.setForceGuard(ForceGuardLoopEntry);
+ if (LoopDecrement.getNumOccurrences())
+ Opts.setDecrement(LoopDecrement);
+ if (CounterBitWidth.getNumOccurrences())
+ Opts.setCounterBitwidth(CounterBitWidth);
- for (Loop *L : *LI)
- if (L->isOutermost())
- TryConvertLoop(L);
+ HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
+ Opts);
+ return Impl.run(F);
+}
+
+PreservedAnalyses HardwareLoopsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &DL = F.getParent()->getDataLayout();
+
+ HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
+ bool Changed = Impl.run(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<BranchProbabilityAnalysis>();
+ return PA;
+}
+bool HardwareLoopsImpl::run(Function &F) {
+ LLVMContext &Ctx = F.getParent()->getContext();
+ for (Loop *L : LI)
+ if (L->isOutermost())
+ TryConvertLoop(L, Ctx);
return MadeChange;
}
// Return true if the search should stop, which will be when an inner loop is
// converted and the parent loop doesn't support containing a hardware loop.
-bool HardwareLoops::TryConvertLoop(Loop *L) {
+bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
// Process nested loops first.
bool AnyChanged = false;
for (Loop *SL : *L)
- AnyChanged |= TryConvertLoop(SL);
+ AnyChanged |= TryConvertLoop(SL, Ctx);
if (AnyChanged) {
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
ORE, L);
@@ -252,39 +314,39 @@ bool HardwareLoops::TryConvertLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
HardwareLoopInfo HWLoopInfo(L);
- if (!HWLoopInfo.canAnalyze(*LI)) {
+ if (!HWLoopInfo.canAnalyze(LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
"HWLoopCannotAnalyze", ORE, L);
return false;
}
- if (!ForceHardwareLoops &&
- !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+ if (!Opts.Force &&
+ !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
reportHWLoopFailure("it's not profitable to create a hardware-loop",
"HWLoopNotProfitable", ORE, L);
return false;
}
// Allow overriding of the counter width and loop decrement value.
- if (CounterBitWidth.getNumOccurrences())
- HWLoopInfo.CountType =
- IntegerType::get(M->getContext(), CounterBitWidth);
+ if (Opts.Bitwidth.has_value()) {
+ HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
+ }
- if (LoopDecrement.getNumOccurrences())
+ if (Opts.Decrement.has_value())
HWLoopInfo.LoopDecrement =
- ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+ ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
MadeChange |= TryConvertLoop(HWLoopInfo);
- return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
}
-bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
Loop *L = HWLoopInfo.L;
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
- if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
- ForceHardwareLoopPHI)) {
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
+ Opts.getForcePhi())) {
// TODO: there can be many reasons a loop is not considered a
// candidate, so we should let isHardwareLoopCandidate fill in the
// reason and then report a better message here.
@@ -300,11 +362,11 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
// If we don't have a preheader, then insert one.
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+ Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
if (!Preheader)
return false;
- HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
+ HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
HWLoop.Create();
++NumHWLoops;
return true;
@@ -322,7 +384,7 @@ void HardwareLoop::Create() {
Value *Setup = InsertIterationSetup(LoopCountInit);
- if (UsePHICounter || ForceHardwareLoopPHI) {
+ if (UsePHICounter || Opts.ForcePhi) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
@@ -397,7 +459,8 @@ Value *HardwareLoop::InitLoopCount() {
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
- UseLoopGuard |= ForceGuardLoopEntry;
+ if (Opts.ForceGuard)
+ UseLoopGuard = true;
} else
UseLoopGuard = false;
@@ -441,7 +504,7 @@ Value *HardwareLoop::InitLoopCount() {
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
- bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+ bool UsePhi = UsePHICounter || Opts.ForcePhi;
Intrinsic::ID ID = UseLoopGuard
? (UsePhi ? Intrinsic::test_start_loop_iterations
: Intrinsic::test_set_loop_iterations)
@@ -533,11 +596,11 @@ void HardwareLoop::UpdateBranch(Value *EltsRem) {
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
}
-INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
-FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
+FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 105ab908d3fa..2ad5820bd9fb 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -71,8 +71,6 @@ static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
- cl::init(false), cl::Hidden);
static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
@@ -189,16 +187,16 @@ namespace {
std::vector<BBInfo> BBAnalysis;
TargetSchedModel SchedModel;
- const TargetLoweringBase *TLI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineBranchProbabilityInfo *MBPI;
- MachineRegisterInfo *MRI;
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
LivePhysRegs Redefs;
- bool PreRegAlloc;
- bool MadeChange;
+ bool PreRegAlloc = true;
+ bool MadeChange = false;
int FnNum = -1;
std::function<bool(const MachineFunction &)> PredicateFtor;
@@ -532,7 +530,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (DisableTriangle && !isFalse && !isRev) break;
if (DisableTriangleR && !isFalse && isRev) break;
if (DisableTriangleF && isFalse && !isRev) break;
- if (DisableTriangleFR && isFalse && isRev) break;
LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");
if (isFalse)
LLVM_DEBUG(dbgs() << " false");
@@ -1512,19 +1509,9 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
MIB.addReg(Reg, RegState::Implicit | RegState::Define);
continue;
}
- if (LiveBeforeMI.count(Reg))
+ if (any_of(TRI->subregs_inclusive(Reg),
+ [&](MCPhysReg S) { return LiveBeforeMI.count(S); }))
MIB.addReg(Reg, RegState::Implicit);
- else {
- bool HasLiveSubReg = false;
- for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) {
- if (!LiveBeforeMI.count(*S))
- continue;
- HasLiveSubReg = true;
- break;
- }
- if (HasLiveSubReg)
- MIB.addReg(Reg, RegState::Implicit);
- }
}
}
@@ -1958,17 +1945,15 @@ bool IfConverter::IfConvertDiamondCommon(
} else if (!RedefsByFalse.count(Reg)) {
// These are defined before ctrl flow reach the 'false' instructions.
// They cannot be modified by the 'true' instructions.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ExtUses.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ ExtUses.insert(SubReg);
}
}
for (MCPhysReg Reg : Defs) {
if (!ExtUses.count(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RedefsByFalse.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ RedefsByFalse.insert(SubReg);
}
}
}
@@ -2244,6 +2229,15 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
assert(!FromMBB.hasAddressTaken() &&
"Removing a BB whose address is taken!");
+ // If we're about to splice an INLINEASM_BR from FromBBI, we need to update
+ // ToBBI's successor list accordingly.
+ if (FromMBB.mayHaveInlineAsmBr())
+ for (MachineInstr &MI : FromMBB)
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ for (MachineOperand &MO : MI.operands())
+ if (MO.isMBB() && !ToBBI.BB->isSuccessor(MO.getMBB()))
+ ToBBI.BB->addSuccessor(MO.getMBB(), BranchProbability::getZero());
+
// In case FromMBB contains terminators (e.g. return instruction),
// first move the non-terminator instructions, then the terminators.
MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index fa493af0eea7..b2a7aad73411 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -94,7 +94,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// computeDependence).
bool CanReorder;
- /// If non-None, then an instruction in \p Insts that also must be
+ /// If non-std::nullopt, then an instruction in \p Insts that also must be
/// hoisted.
std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
@@ -778,9 +778,7 @@ void ImplicitNullChecks::rewriteNullChecks(
// The original operation may define implicit-defs alongside
// the value.
MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
- for (const MachineOperand &MO : FaultingInstr->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : FaultingInstr->all_defs()) {
Register Reg = MO.getReg();
if (!Reg || MBB->isLiveIn(Reg))
continue;
@@ -788,8 +786,8 @@ void ImplicitNullChecks::rewriteNullChecks(
}
if (auto *DepMI = NC.getOnlyDependency()) {
- for (auto &MO : DepMI->operands()) {
- if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
+ for (auto &MO : DepMI->all_defs()) {
+ if (!MO.getReg() || MO.isDead())
continue;
if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
NC.getNotNullSucc()->addLiveIn(MO.getReg());
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index cf4fff878ad1..c62f3db9d321 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -165,8 +165,8 @@ class InlineSpiller : public Spiller {
const MachineBlockFrequencyInfo &MBFI;
// Variables that are valid during spill(), but used by multiple methods.
- LiveRangeEdit *Edit;
- LiveInterval *StackInt;
+ LiveRangeEdit *Edit = nullptr;
+ LiveInterval *StackInt = nullptr;
int StackSlot;
Register Original;
@@ -175,6 +175,7 @@ class InlineSpiller : public Spiller {
// All COPY instructions to/from snippets.
// They are ignored since both operands refer to the same stack slot.
+ // For bundled copies, this will only include the first header copy.
SmallPtrSet<MachineInstr*, 8> SnippetCopies;
// Values that failed to remat at some point.
@@ -257,19 +258,64 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
/// otherwise return 0.
-static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
- if (!MI.isFullCopy())
+static Register isCopyOf(const MachineInstr &MI, Register Reg,
+ const TargetInstrInfo &TII) {
+ if (!TII.isCopyInstr(MI))
return Register();
- if (MI.getOperand(0).getReg() == Reg)
- return MI.getOperand(1).getReg();
- if (MI.getOperand(1).getReg() == Reg)
- return MI.getOperand(0).getReg();
+
+ const MachineOperand &DstOp = MI.getOperand(0);
+ const MachineOperand &SrcOp = MI.getOperand(1);
+
+ // TODO: Probably only worth allowing subreg copies with undef dests.
+ if (DstOp.getSubReg() != SrcOp.getSubReg())
+ return Register();
+ if (DstOp.getReg() == Reg)
+ return SrcOp.getReg();
+ if (SrcOp.getReg() == Reg)
+ return DstOp.getReg();
+ return Register();
+}
+
+/// Check for a copy bundle as formed by SplitKit.
+static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg,
+ const TargetInstrInfo &TII) {
+ if (!FirstMI.isBundled())
+ return isCopyOf(FirstMI, Reg, TII);
+
+ assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() &&
+ "expected to see first instruction in bundle");
+
+ Register SnipReg;
+ MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator();
+ while (I->isBundledWithSucc()) {
+ const MachineInstr &MI = *I;
+ auto CopyInst = TII.isCopyInstr(MI);
+ if (!CopyInst)
+ return Register();
+
+ const MachineOperand &DstOp = *CopyInst->Destination;
+ const MachineOperand &SrcOp = *CopyInst->Source;
+ if (DstOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = SrcOp.getReg();
+ else if (SnipReg != SrcOp.getReg())
+ return Register();
+ } else if (SrcOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = DstOp.getReg();
+ else if (SnipReg != DstOp.getReg())
+ return Register();
+ }
+
+ ++I;
+ }
+
return Register();
}
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
+ for (const MachineOperand &MO : MI.all_defs())
+ if (MO.getReg().isVirtual())
LIS.getInterval(MO.getReg());
}
@@ -307,14 +353,14 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
MachineInstr *UseMI = nullptr;
// Check that all uses satisfy our criteria.
- for (MachineRegisterInfo::reg_instr_nodbg_iterator
- RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()),
- E = MRI.reg_instr_nodbg_end();
+ for (MachineRegisterInfo::reg_bundle_nodbg_iterator
+ RI = MRI.reg_bundle_nodbg_begin(SnipLI.reg()),
+ E = MRI.reg_bundle_nodbg_end();
RI != E;) {
MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
- if (isFullCopyOf(MI, Reg))
+ if (isCopyOfBundle(MI, Reg, TII))
continue;
// Allow stack slot loads.
@@ -351,9 +397,8 @@ void InlineSpiller::collectRegsToSpill() {
if (Original == Reg)
return;
- for (MachineInstr &MI :
- llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
- Register SnipReg = isFullCopyOf(MI, Reg);
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
+ Register SnipReg = isCopyOfBundle(MI, Reg, TII);
if (!isSibling(SnipReg))
continue;
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
@@ -475,21 +520,22 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineInstr &MI :
- llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
- if (!MI.isCopy() && !MI.mayStore())
+ llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) {
+ if (!MI.mayStore() && !TII.isCopyInstr(MI))
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
continue;
// Follow sibling copies down the dominator tree.
- if (Register DstReg = isFullCopyOf(MI, Reg)) {
+ if (Register DstReg = isCopyOfBundle(MI, Reg, TII)) {
if (isSibling(DstReg)) {
- LiveInterval &DstLI = LIS.getInterval(DstReg);
- VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
- assert(DstVNI && "Missing defined value");
- assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
- WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
}
continue;
}
@@ -593,8 +639,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!ParentVNI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
+ for (MachineOperand &MO : MI.all_uses())
+ if (MO.getReg() == VirtReg.reg())
MO.setIsUndef();
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
@@ -826,7 +872,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (Ops.back().first != MI || MI->isBundled())
return false;
- bool WasCopy = MI->isCopy();
+ bool WasCopy = TII.isCopyInstr(*MI).has_value();
Register ImpReg;
// TII::foldMemoryOperand will do what we need here for statepoint
@@ -1111,7 +1157,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isFullCopyOf(MI, Reg);
+ Register SibReg = isCopyOfBundle(MI, Reg, TII);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
@@ -1202,8 +1248,8 @@ void InlineSpiller::spillAll() {
llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
- LIS.RemoveMachineInstrFromMaps(MI);
- MI.eraseFromParent();
+ LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
}
}
@@ -1250,7 +1296,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
LiveInterval &OrigLI = LIS.getInterval(Original);
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
- if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
+ if (!StackSlotToOrigLI.contains(StackSlot)) {
auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight());
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
@@ -1459,7 +1505,7 @@ void HoistSpillHelper::runHoistSpills(
MachineBasicBlock *Block = (*RIt)->getBlock();
// If Block contains an original spill, simply continue.
- if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
+ if (SpillsToKeep.contains(*RIt) && !SpillsToKeep[*RIt]) {
SpillsInSubTreeMap[*RIt].first.insert(*RIt);
// SpillsInSubTreeMap[*RIt].second contains the cost of spill.
SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
@@ -1469,7 +1515,7 @@ void HoistSpillHelper::runHoistSpills(
// Collect spills in subtree of current node (*RIt) to
// SpillsInSubTreeMap[*RIt].first.
for (MachineDomTreeNode *Child : (*RIt)->children()) {
- if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
+ if (!SpillsInSubTreeMap.contains(Child))
continue;
// The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
// should be placed before getting the begin and end iterators of
@@ -1508,8 +1554,7 @@ void HoistSpillHelper::runHoistSpills(
for (auto *const SpillBB : SpillsInSubTree) {
// When SpillBB is a BB contains original spill, insert the spill
// to SpillsToRm.
- if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
- !SpillsToKeep[SpillBB]) {
+ if (SpillsToKeep.contains(SpillBB) && !SpillsToKeep[SpillBB]) {
MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
SpillsToRm.push_back(SpillToRm);
}
diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp
index 3cab9e5734ee..ae197ee5553a 100644
--- a/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -93,8 +93,8 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
// Invalidate all iterators.
PrevPos = SlotIndex();
unsigned i = 0;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
- RegUnits[i].VirtTag = LIUArray[*Units].getTag();
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnits[i++].VirtTag = LIUArray[Unit].getTag();
}
void InterferenceCache::Entry::reset(MCRegister physReg,
@@ -110,20 +110,21 @@ void InterferenceCache::Entry::reset(MCRegister physReg,
// Reset iterators.
PrevPos = SlotIndex();
RegUnits.clear();
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- RegUnits.push_back(LIUArray[*Units]);
- RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ RegUnits.push_back(LIUArray[Unit]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(Unit);
}
}
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
unsigned i = 0, e = RegUnits.size();
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
if (i == e)
return false;
- if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
+ if (LIUArray[Unit].changedSince(RegUnits[i].VirtTag))
return false;
+ ++i;
}
return i == e;
}
diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h
index 97464da9f17b..2a176b4f2cf7 100644
--- a/llvm/lib/CodeGen/InterferenceCache.h
+++ b/llvm/lib/CodeGen/InterferenceCache.h
@@ -54,7 +54,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
unsigned RefCount = 0;
/// MF - The current function.
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
/// Indexes - Mapping block numbers to SlotIndex ranges.
SlotIndexes *Indexes = nullptr;
@@ -156,7 +156,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
public:
InterferenceCache() = default;
-
+ InterferenceCache &operator=(const InterferenceCache &other) = delete;
+ InterferenceCache(const InterferenceCache &other) = delete;
~InterferenceCache() {
free(PhysRegEntries);
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 0582378be4cd..6b3848531569 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -103,7 +104,7 @@ private:
const TargetLowering *TLI = nullptr;
/// The maximum supported interleave factor.
- unsigned MaxFactor;
+ unsigned MaxFactor = 0u;
/// Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,
@@ -113,6 +114,16 @@ private:
bool lowerInterleavedStore(StoreInst *SI,
SmallVector<Instruction *, 32> &DeadInsts);
+ /// Transform a load and a deinterleave intrinsic into target specific
+ /// instructions.
+ bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Transform an interleave intrinsic and a store into target specific
+ /// instructions.
+ bool lowerInterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
/// Returns true if the uses of an interleaved load by the
/// extractelement instructions in \p Extracts can be replaced by uses of the
/// shufflevector instructions in \p Shuffles instead. If so, the necessary
@@ -202,86 +213,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// The particular case of an RE-interleave mask is:
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned MaxFactor, unsigned OpNumElts) {
- unsigned NumElts = Mask.size();
+static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
+ unsigned MaxFactor) {
+ unsigned NumElts = SVI->getShuffleMask().size();
if (NumElts < 4)
return false;
// Check potential Factors.
for (Factor = 2; Factor <= MaxFactor; Factor++) {
- if (NumElts % Factor)
- continue;
-
- unsigned LaneLen = NumElts / Factor;
- if (!isPowerOf2_32(LaneLen))
- continue;
-
- // Check whether each element matches the general interleaved rule.
- // Ignore undef elements, as long as the defined elements match the rule.
- // Outer loop processes all factors (x, y, z in the above example)
- unsigned I = 0, J;
- for (; I < Factor; I++) {
- unsigned SavedLaneValue;
- unsigned SavedNoUndefs = 0;
-
- // Inner loop processes consecutive accesses (x, x+1... in the example)
- for (J = 0; J < LaneLen - 1; J++) {
- // Lane computes x's position in the Mask
- unsigned Lane = J * Factor + I;
- unsigned NextLane = Lane + Factor;
- int LaneValue = Mask[Lane];
- int NextLaneValue = Mask[NextLane];
-
- // If both are defined, values must be sequential
- if (LaneValue >= 0 && NextLaneValue >= 0 &&
- LaneValue + 1 != NextLaneValue)
- break;
-
- // If the next value is undef, save the current one as reference
- if (LaneValue >= 0 && NextLaneValue < 0) {
- SavedLaneValue = LaneValue;
- SavedNoUndefs = 1;
- }
-
- // Undefs are allowed, but defined elements must still be consecutive:
- // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
- // Verify this by storing the last non-undef followed by an undef
- // Check that following non-undef masks are incremented with the
- // corresponding distance.
- if (SavedNoUndefs > 0 && LaneValue < 0) {
- SavedNoUndefs++;
- if (NextLaneValue >= 0 &&
- SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
- break;
- }
- }
-
- if (J < LaneLen - 1)
- break;
-
- int StartMask = 0;
- if (Mask[I] >= 0) {
- // Check that the start of the I range (J=0) is greater than 0
- StartMask = Mask[I];
- } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
- // StartMask defined by the last value in lane
- StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
- } else if (SavedNoUndefs > 0) {
- // StartMask defined by some non-zero value in the j loop
- StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
- }
- // else StartMask remains set to 0, i.e. all elements are undefs
-
- if (StartMask < 0)
- break;
- // We must stay within the vectors; This case can happen with undefs.
- if (StartMask + LaneLen > OpNumElts*2)
- break;
- }
-
- // Found an interleaved mask of current factor.
- if (I == Factor)
+ if (SVI->isInterleave(Factor))
return true;
}
@@ -311,8 +251,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
continue;
}
if (auto *BI = dyn_cast<BinaryOperator>(User)) {
- if (all_of(BI->users(),
- [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
+ if (all_of(BI->users(), [](auto *U) {
+ auto *SVI = dyn_cast<ShuffleVectorInst>(U);
+ return SVI && isa<UndefValue>(SVI->getOperand(1));
+ })) {
for (auto *SVI : BI->users())
BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
continue;
@@ -500,9 +442,7 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- unsigned OpNumElts =
- cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
+ if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
@@ -517,6 +457,47 @@ bool InterleavedAccess::lowerInterleavedStore(
return true;
}
+bool InterleavedAccess::lowerDeinterleaveIntrinsic(
+ IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) {
+ LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
+
+ if (!LI || !LI->hasOneUse() || !LI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
+ return false;
+
+ // We now have a target-specific load, so delete the old one.
+ DeadInsts.push_back(DI);
+ DeadInsts.push_back(LI);
+ return true;
+}
+
+bool InterleavedAccess::lowerInterleaveIntrinsic(
+ IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!II->hasOneUse())
+ return false;
+
+ StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
+
+ if (!SI || !SI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
+ return false;
+
+ // We now have a target-specific store, so delete the old one.
+ DeadInsts.push_back(SI);
+ DeadInsts.push_back(II);
+ return true;
+}
+
bool InterleavedAccess::runOnFunction(Function &F) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC || !LowerInterleavedAccesses)
@@ -539,6 +520,15 @@ bool InterleavedAccess::runOnFunction(Function &F) {
if (auto *SI = dyn_cast<StoreInst>(&I))
Changed |= lowerInterleavedStore(SI, DeadInsts);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ // At present, we only have intrinsics to represent (de)interleaving
+ // with a factor of 2.
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
+ Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
+ }
}
for (auto *I : DeadInsts)
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 0d36badfa10f..d0ad6e45b4d3 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -318,7 +318,7 @@ public:
// See Proof(2): Trailing zero bits indicate a left shift. This removes
// leading bits from the result even if they are undefined.
- decErrorMSBs(C.countTrailingZeros());
+ decErrorMSBs(C.countr_zero());
A *= C;
pushBOperation(Mul, C);
@@ -475,7 +475,7 @@ public:
//
// If this can be proven add shiftAmt to the error counter
// `ErrorMSBs`. Otherwise set all bits as undefined.
- if (A.countTrailingZeros() < shiftAmt)
+ if (A.countr_zero() < shiftAmt)
ErrorMSBs = A.getBitWidth();
else
incErrorMSBs(shiftAmt);
@@ -678,6 +678,8 @@ public:
EI = new ElementInfo[VTy->getNumElements()];
}
+ VectorInfo &operator=(const VectorInfo &other) = delete;
+
virtual ~VectorInfo() { delete[] EI; }
unsigned getDimension() const { return VTy->getNumElements(); }
diff --git a/llvm/lib/CodeGen/KCFI.cpp b/llvm/lib/CodeGen/KCFI.cpp
new file mode 100644
index 000000000000..bffa02ca8afd
--- /dev/null
+++ b/llvm/lib/CodeGen/KCFI.cpp
@@ -0,0 +1,111 @@
+//===---- KCFI.cpp - Implements Kernel Control-Flow Integrity (KCFI) ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements Kernel Control-Flow Integrity (KCFI) indirect call
+// check lowering. For each call instruction with a cfi-type attribute, it
+// emits an arch-specific check before the call, and bundles the check and
+// the call to prevent unintentional modifications.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "kcfi"
+#define KCFI_PASS_NAME "Insert KCFI indirect call checks"
+
+STATISTIC(NumKCFIChecksAdded, "Number of indirect call checks added");
+
+namespace {
+class KCFI : public MachineFunctionPass {
+public:
+ static char ID;
+
+ KCFI() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return KCFI_PASS_NAME; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// Machine instruction info used throughout the class.
+ const TargetInstrInfo *TII = nullptr;
+
+ /// Target lowering for arch-specific parts.
+ const TargetLowering *TLI = nullptr;
+
+ /// Emits a KCFI check before an indirect call.
+ /// \returns true if the check was added and false otherwise.
+ bool emitCheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator I) const;
+};
+
+char KCFI::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS(KCFI, DEBUG_TYPE, KCFI_PASS_NAME, false, false)
+
+FunctionPass *llvm::createKCFIPass() { return new KCFI(); }
+
+bool KCFI::emitCheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator MBBI) const {
+ assert(TII && "Target instruction info was not initialized");
+ assert(TLI && "Target lowering was not initialized");
+
+ // If the call instruction is bundled, we can only emit a check safely if
+ // it's the first instruction in the bundle.
+ if (MBBI->isBundled() && !std::prev(MBBI)->isBundle())
+ report_fatal_error("Cannot emit a KCFI check for a bundled call");
+
+ // Emit a KCFI check for the call instruction at MBBI. The implementation
+ // must unfold memory operands if applicable.
+ MachineInstr *Check = TLI->EmitKCFICheck(MBB, MBBI, TII);
+
+ // Clear the original call's CFI type.
+ assert(MBBI->isCall() && "Unexpected instruction type");
+ MBBI->setCFIType(*MBB.getParent(), 0);
+
+ // If not already bundled, bundle the check and the call to prevent
+ // further changes.
+ if (!MBBI->isBundled())
+ finalizeBundle(MBB, Check->getIterator(), std::next(MBBI->getIterator()));
+
+ ++NumKCFIChecksAdded;
+ return true;
+}
+
+bool KCFI::runOnMachineFunction(MachineFunction &MF) {
+ const Module *M = MF.getMMI().getModule();
+ if (!M->getModuleFlag("kcfi"))
+ return false;
+
+ const auto &SubTarget = MF.getSubtarget();
+ TII = SubTarget.getInstrInfo();
+ TLI = SubTarget.getTargetLowering();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ // Use instr_iterator because we don't want to skip bundles.
+ for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+ MIE = MBB.instr_end();
+ MII != MIE; ++MII) {
+ if (MII->isCall() && MII->getCFIType())
+ Changed |= emitCheck(MBB, MII);
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 3192dcadb5f5..d02ec1db1165 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -274,16 +274,17 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// emission fails.
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx);
- MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
+ std::unique_ptr<MCCodeEmitter> MCE(
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx));
+ std::unique_ptr<MCAsmBackend> MAB(
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
if (!MCE || !MAB)
return true;
const Triple &T = getTargetTriple();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),
- std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
+ T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE),
+ STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index ba417322d4f6..57df9b67fd02 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -601,7 +601,7 @@ public:
if (Var.getInlinedAt())
return false;
- if (Expr->getNumElements() > 0)
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
return false;
return true;
@@ -1544,12 +1544,12 @@ std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
if (Size != MainRegSize || Offset) {
// Enumerate all subregisters, searching.
Register NewReg = 0;
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
- unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
unsigned SubregSize = TRI->getSubRegIdxSize(Subreg);
unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg);
if (SubregSize == Size && SubregOffset == Offset) {
- NewReg = *SRI;
+ NewReg = SR;
break;
}
}
@@ -2066,12 +2066,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
};
// Then, transfer subreg bits.
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
// Ensure this reg is tracked,
- (void)MTracker->lookupOrTrackRegister(*SRI);
- unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI);
+ (void)MTracker->lookupOrTrackRegister(SR);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, SR);
unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
- DoTransfer(*SRI, SpillID);
+ DoTransfer(SR, SpillID);
}
// Directly lookup size of main source reg, and transfer.
@@ -2101,10 +2101,10 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
MTracker->setReg(DestReg, ReadValue);
};
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
- unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
unsigned SpillID = MTracker->getLocID(*Loc, Subreg);
- DoTransfer(*SRI, SpillID);
+ DoTransfer(SR, SpillID);
}
// Directly look up this registers slot idx by size, and transfer.
@@ -2513,8 +2513,8 @@ void InstrRefBasedLDV::placeMLocPHIs(
Register R = MTracker->LocIdxToLocID[L];
SmallSet<Register, 8> FoundRegUnits;
bool AnyIllegal = false;
- for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) {
- for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){
+ for (MCRegUnit Unit : TRI->regunits(R.asMCReg())) {
+ for (MCRegUnitRootIterator URoot(Unit, TRI); URoot.isValid(); ++URoot) {
if (!MTracker->isRegisterTracked(*URoot)) {
// Not all roots were loaded into the tracking map: this register
// isn't actually def'd anywhere, we only read from it. Generate PHIs
@@ -3179,7 +3179,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
- if (TransferFunc.find(Var) != TransferFunc.end())
+ if (TransferFunc.contains(Var))
DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
}
@@ -3295,7 +3295,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// to be visited next time around.
for (auto *s : MBB->successors()) {
// Ignore out of scope / not-to-be-explored successors.
- if (LiveInIdx.find(s) == LiveInIdx.end())
+ if (!LiveInIdx.contains(s))
continue;
if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -3411,7 +3411,7 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
for (MachineBasicBlock *MBB : RPOT)
processMBB(MBB);
for (MachineBasicBlock &MBB : MF)
- if (BBToOrder.find(&MBB) == BBToOrder.end())
+ if (!BBToOrder.contains(&MBB))
processMBB(&MBB);
// Order value substitutions by their "source" operand pair, for quick lookup.
@@ -3716,7 +3716,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
unsigned BlockNo = Num.getBlock();
LocIdx LocNo = Num.getLoc();
- Num = MInLocs[BlockNo][LocNo.asU64()];
+ ValueIDNum ResolvedValue = MInLocs[BlockNo][LocNo.asU64()];
+ // If there is no resolved value for this live-in then it is not directly
+ // reachable from the entry block -- model it as a PHI on entry to this
+ // block, which means we leave the ValueIDNum unchanged.
+ if (ResolvedValue != ValueIDNum::EmptyValue)
+ Num = ResolvedValue;
}
// Later, we'll be looking up ranges of instruction numbers.
llvm::sort(DebugPHINumToValue);
@@ -4050,10 +4055,7 @@ public:
/// ValueIsPHI - Check if the instruction that defines the specified value
/// is a PHI instruction.
static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
- auto PHIIt = Updater->PHIs.find(Val);
- if (PHIIt == Updater->PHIs.end())
- return nullptr;
- return PHIIt->second;
+ return Updater->PHIs.lookup(Val);
}
/// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
@@ -4195,7 +4197,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// Are all these things actually defined?
for (auto &PHIIt : PHI->IncomingValues) {
// Any undef input means DBG_PHIs didn't dominate the use point.
- if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end())
+ if (Updater.UndefMap.contains(&PHIIt.first->BB))
return std::nullopt;
ValueIDNum ValueToCheck;
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 2fdc37c6dda2..30de18e53c4f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -656,7 +656,7 @@ public:
// If we discover a new machine location, assign it an mphi with this
// block number.
- unsigned CurBB;
+ unsigned CurBB = -1;
/// Cached local copy of the number of registers the target has.
unsigned NumRegs;
@@ -740,7 +740,7 @@ public:
unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
unsigned SlotNo = Spill.id() - 1;
SlotNo *= NumSlotIdxes;
- assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end());
+ assert(StackSlotIdxes.contains(Idx));
SlotNo += StackSlotIdxes[Idx];
SlotNo += NumRegs;
return SlotNo;
@@ -1094,7 +1094,7 @@ private:
MLocTracker *MTracker = nullptr;
/// Number of the current block LiveDebugValues is stepping through.
- unsigned CurBB;
+ unsigned CurBB = -1;
/// Number of the current instruction LiveDebugValues is evaluating.
unsigned CurInst;
@@ -1197,7 +1197,7 @@ private:
/// For an instruction reference given by \p InstNo and \p OpNo in instruction
/// \p MI returns the Value pointed to by that instruction reference if any
- /// exists, otherwise returns None.
+ /// exists, otherwise returns std::nullopt.
std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo,
MachineInstr &MI,
const ValueTable *MLiveOuts,
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 9dba9a88f703..0c0a4e13c7c9 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -8,7 +8,6 @@
#include "LiveDebugValues.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -19,6 +18,7 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
/// \file LiveDebugValues.cpp
///
@@ -81,7 +81,7 @@ public:
private:
std::unique_ptr<LDVImpl> InstrRefImpl;
std::unique_ptr<LDVImpl> VarLocImpl;
- TargetPassConfig *TPC;
+ TargetPassConfig *TPC = nullptr;
MachineDominatorTree MDT;
};
} // namespace
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index b78757b855f4..116c6b7e2d19 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1116,7 +1116,7 @@ VarLocBasedLDV::~VarLocBasedLDV() = default;
/// location, erase the variable from the Vars set.
void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
- auto DoErase = [VL, this](DebugVariable VarToErase) {
+ auto DoErase = [&VL, this](DebugVariable VarToErase) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
auto It = EraseFrom->find(VarToErase);
if (It != EraseFrom->end()) {
@@ -1312,7 +1312,7 @@ void VarLocBasedLDV::cleanupEntryValueTransfers(
return;
auto TransRange = EntryValTransfers.equal_range(TRInst);
- for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ for (auto &TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
@@ -1347,7 +1347,7 @@ void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
// Try to get non-debug instruction responsible for the DBG_VALUE.
const MachineInstr *TransferInst = nullptr;
Register Reg = MI.getDebugOperand(0).getReg();
- if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end())
+ if (Reg.isValid() && RegSetInstrs.contains(Reg))
TransferInst = RegSetInstrs.find(Reg)->second;
// Case of the parameter's DBG_VALUE at the start of entry MBB.
@@ -2151,7 +2151,9 @@ bool VarLocBasedLDV::isEntryValueCandidate(
// TODO: Add support for parameters that have a pre-existing debug expressions
// (e.g. fragments).
- if (MI.getDebugExpression()->getNumElements() > 0)
+ // A simple deref expression is equivalent to an indirect debug value.
+ const DIExpression *Expr = MI.getDebugExpression();
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
return false;
return true;
@@ -2160,8 +2162,8 @@ bool VarLocBasedLDV::isEntryValueCandidate(
/// Collect all register defines (including aliases) for the given instruction.
static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
const TargetRegisterInfo *TRI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical()) {
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg() && MO.getReg().isPhysical()) {
Regs.insert(MO.getReg());
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Regs.insert(*AI);
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 7cd3d26cf5b3..1cf354349c56 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -445,7 +445,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
while (true) {
// J has just been advanced to satisfy:
- assert(J->end >= I->start);
+ assert(J->end > I->start);
// Check for an overlap.
if (J->start < I->end) {
// I and J are overlapping. Find the later start.
@@ -460,11 +460,11 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
std::swap(I, J);
std::swap(IE, JE);
}
- // Advance J until J->end >= I->start.
+ // Advance J until J->end > I->start.
do
if (++J == JE)
return false;
- while (J->end < I->start);
+ while (J->end <= I->start);
}
}
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a49f6b0604c5..da55e7f7284b 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -280,9 +280,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
bool IsReserved = false;
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
bool IsRootReserved = true;
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
if (!MRI->reg_empty(Reg))
LICalc->createDeadDefs(LR, Reg);
// A register unit is considered reserved if all its roots and all their
@@ -299,9 +297,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
// Ignore uses of reserved registers. We only track defs of those.
if (!IsReserved) {
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
if (!MRI->reg_empty(Reg))
LICalc->extendToUses(LR, Reg);
}
@@ -333,8 +329,7 @@ void LiveIntervals::computeLiveInRegUnits() {
SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
for (const auto &LI : MBB.liveins()) {
- for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
- unsigned Unit = *Units;
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
LiveRange *LR = RegUnitRanges[Unit];
if (!LR) {
// Use segment set to speed-up initial computation of the live range.
@@ -708,9 +703,8 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid();
- ++Unit) {
- const LiveRange &RURange = getRegUnit(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &RURange = getRegUnit(Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
@@ -1056,10 +1050,9 @@ public:
// For physregs, only update the regunits that actually have a
// precomputed live range.
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- if (LiveRange *LR = getRegUnitLI(*Units))
- updateRange(*LR, *Units, LaneBitmask::getNone());
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ if (LiveRange *LR = getRegUnitLI(Unit))
+ updateRange(*LR, Unit, LaneBitmask::getNone());
}
if (hasRegMask)
updateRegMaskSlots();
@@ -1707,8 +1700,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) {
- for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
- if (LiveRange *LR = getCachedRegUnit(*Unit))
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (LiveRange *LR = getCachedRegUnit(Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
LR->removeValNo(VNI);
}
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index d4848f16dcf2..96380d408482 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -265,14 +265,9 @@ void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
if (MRI.isReserved(Reg))
continue;
// Skip the register if we are about to add one of its super registers.
- bool ContainsSuperReg = false;
- for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
- if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) {
- ContainsSuperReg = true;
- break;
- }
- }
- if (ContainsSuperReg)
+ if (any_of(TRI.superregs(Reg), [&](MCPhysReg SReg) {
+ return LiveRegs.contains(SReg) && !MRI.isReserved(SReg);
+ }))
continue;
MBB.addLiveIn(Reg);
}
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index d8b024fbdfea..ff49e080090c 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -82,7 +82,7 @@ void LiveRangeEdit::scanRemattable() {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
- unsigned Original = VRM->getOriginal(getReg());
+ Register Original = VRM->getOriginal(getReg());
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
if (!OrigVNI)
@@ -181,11 +181,9 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg,
- const Remat &RM,
+ Register DestReg, const Remat &RM,
const TargetRegisterInfo &tri,
- bool Late,
- unsigned SubIdx,
+ bool Late, unsigned SubIdx,
MachineInstr *ReplaceIndexMI) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri);
@@ -288,8 +286,12 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Never delete a bundled instruction.
if (MI->isBundled()) {
+ // TODO: Handle deleting copy bundles
+ LLVM_DEBUG(dbgs() << "Won't delete dead bundled inst: " << Idx << '\t'
+ << *MI);
return;
}
+
// Never delete inline asm.
if (MI->isInlineAsm()) {
LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
@@ -306,7 +308,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
// Collect virtual registers to be erased after MI is gone.
- SmallVector<unsigned, 8> RegsToErase;
+ SmallVector<Register, 8> RegsToErase;
bool ReadsPhysRegs = false;
bool isOrigDef = false;
Register Dest;
@@ -350,7 +352,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ if ((MI->readsVirtualRegister(Reg) &&
+ (MO.isDef() || TII.isCopyInstr(*MI))) ||
(MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
else if (MO.readsReg())
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 93f5314539cd..af7d6c4403b8 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -109,6 +110,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
@@ -197,7 +199,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// is because it needs more accurate model to handle register
// pressure correctly.
MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
- if (!DefInstr.isCopy())
+ if (!TII.isCopyInstr(DefInstr))
NumEligibleUse++;
Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
} else {
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 6ca7f00a7885..6df7e5c10862 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -93,8 +93,8 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
}
}
} else {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (Func(*Units, VRegInterval))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (Func(Unit, VRegInterval))
return true;
}
}
@@ -136,8 +136,8 @@ void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
}
bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- if (!Matrix[*Unit].empty())
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix[Unit].empty())
return true;
}
return false;
@@ -216,7 +216,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
LR.addSegment(Seg);
// Check for interference with that segment
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
// includes the address of the live range. If (for the same reg unit) this
// checkInterference overload is called twice, without any other query()
@@ -230,7 +230,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
// subtle bugs due to query identity. Avoiding caching, for example, would
// greatly simplify things.
LiveIntervalUnion::Query Q;
- Q.reset(UserTag, LR, Matrix[*Units]);
+ Q.reset(UserTag, LR, Matrix[Unit]);
if (Q.checkInterference())
return true;
}
@@ -239,8 +239,8 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
const LiveInterval *VRegInterval = nullptr;
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- if ((VRegInterval = Matrix[*Unit].getOneVReg()))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if ((VRegInterval = Matrix[Unit].getOneVReg()))
return VRegInterval->reg();
}
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 34c81c92707e..9cd74689ba10 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -191,8 +191,7 @@ LiveVariables::FindLastPartialDef(Register Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = nullptr;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
continue;
@@ -208,15 +207,13 @@ LiveVariables::FindLastPartialDef(Register Reg,
return nullptr;
PartDefRegs.insert(LastDefReg);
- for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = LastDef->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ for (MachineOperand &MO : LastDef->all_defs()) {
+ if (MO.getReg() == 0)
continue;
Register DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
- for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PartDefRegs.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(DefReg))
+ PartDefRegs.insert(SubReg);
}
}
return LastDef;
@@ -245,8 +242,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (Processed.count(SubReg))
continue;
if (PartDefRegs.count(SubReg))
@@ -257,8 +253,8 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
- Processed.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ Processed.insert(SS);
}
}
} else if (LastDef && !PhysRegUse[Reg] &&
@@ -268,9 +264,8 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
true/*IsImp*/));
// Remember this use.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PhysRegUse[*SubRegs] = &MI;
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PhysRegUse[SubReg] = &MI;
}
/// FindLastRefOrPartRef - Return the last reference or partial reference of
@@ -284,8 +279,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -333,8 +327,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
MachineInstr *LastPartDef = nullptr;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -347,9 +340,8 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
continue;
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid();
- ++SS)
- PartUses.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PartUses.insert(SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
LastRefOrPartRefDist = Dist;
@@ -364,8 +356,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
// dead EAX = op implicit-def AL
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (!PartUses.count(SubReg))
continue;
bool NeedDef = true;
@@ -384,12 +375,11 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
LastSubRef->addRegisterKilled(SubReg, TRI, true);
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
- SS.isValid(); ++SS)
- PhysRegUse[*SS] = LastRefOrPartRef;
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PhysRegUse[SS] = LastRefOrPartRef;
}
- for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
- PartUses.erase(*SS);
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ PartUses.erase(SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
if (LastPartDef)
@@ -430,9 +420,9 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
// Kill the largest clobbered super-register.
// This avoids needless implicit operands.
unsigned Super = Reg;
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
- if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
- Super = *SR;
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ if ((PhysRegDef[SR] || PhysRegUse[SR]) && MO.clobbersPhysReg(SR))
+ Super = SR;
HandlePhysRegKill(Super, nullptr);
}
}
@@ -442,12 +432,10 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- Live.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ Live.insert(SubReg);
} else {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
// e.g.
@@ -457,9 +445,8 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
if (Live.count(SubReg))
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
- SS.isValid(); ++SS)
- Live.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ Live.insert(SS);
}
}
}
@@ -468,8 +455,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
continue;
@@ -484,9 +470,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
Register Reg = Defs.pop_back_val();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
PhysRegDef[SubReg] = &MI;
PhysRegUse[SubReg] = nullptr;
}
@@ -699,7 +683,7 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
if (UseMI.isPHI()) {
// If Reg is used in a phi then it is live-to-end of the corresponding
// predecessor.
- unsigned Idx = UseMI.getOperandNo(&UseMO);
+ unsigned Idx = UseMO.getOperandNo();
LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
} else if (&UseBB == &DefBB) {
// A non-phi use in the same BB as the single def must come after the def.
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index b47c96e50831..24c30b756737 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -12,74 +12,55 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
- if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- auto EC = VTy->getElementCount();
- LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
- if (EC.isScalar())
- return ScalarTy;
- return LLT::vector(EC, ScalarTy);
- }
-
- if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- unsigned AddrSpace = PTy->getAddressSpace();
- return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
- }
-
- if (Ty.isSized()) {
+LLT::LLT(MVT VT) {
+ if (VT.isVector()) {
+ bool asVector = VT.getVectorMinNumElements() > 1;
+ init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
+ VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
+ /*AddressSpace=*/0);
+ } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- auto SizeInBits = DL.getTypeSizeInBits(&Ty);
- assert(SizeInBits != 0 && "invalid zero-sized type");
- return LLT::scalar(SizeInBits);
+ init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
+ ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
+ } else {
+ IsScalar = false;
+ IsPointer = false;
+ IsVector = false;
+ RawData = 0;
}
-
- return LLT();
}
-MVT llvm::getMVTForLLT(LLT Ty) {
- if (!Ty.isVector())
- return MVT::getIntegerVT(Ty.getSizeInBits());
-
- return MVT::getVectorVT(
- MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
- Ty.getNumElements());
+void LLT::print(raw_ostream &OS) const {
+ if (isVector()) {
+ OS << "<";
+ OS << getElementCount() << " x " << getElementType() << ">";
+ } else if (isPointer())
+ OS << "p" << getAddressSpace();
+ else if (isValid()) {
+ assert(isScalar() && "unexpected type");
+ OS << "s" << getScalarSizeInBits();
+ } else
+ OS << "LLT_invalid";
}
-EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
- LLVMContext &Ctx) {
- if (Ty.isVector()) {
- EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
- return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
- }
-
- return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
-}
-
-LLT llvm::getLLTForMVT(MVT Ty) {
- if (!Ty.isVector())
- return LLT::scalar(Ty.getSizeInBits());
-
- return LLT::scalarOrVector(Ty.getVectorElementCount(),
- Ty.getVectorElementType().getSizeInBits());
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LLT::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
}
+#endif
-const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
- assert(Ty.isScalar() && "Expected a scalar type.");
- switch (Ty.getSizeInBits()) {
- case 16:
- return APFloat::IEEEhalf();
- case 32:
- return APFloat::IEEEsingle();
- case 64:
- return APFloat::IEEEdouble();
- case 128:
- return APFloat::IEEEquad();
- }
- llvm_unreachable("Invalid FP type size.");
-}
+const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
new file mode 100644
index 000000000000..bc2ea3f05b6d
--- /dev/null
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -0,0 +1,85 @@
+//===-- llvm/CodeGen/LowLevelTypeUtils.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
+ if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+ auto EC = VTy->getElementCount();
+ LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
+ if (EC.isScalar())
+ return ScalarTy;
+ return LLT::vector(EC, ScalarTy);
+ }
+
+ if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ unsigned AddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ }
+
+ if (Ty.isSized() && !Ty.isScalableTargetExtTy()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ return LLT::scalar(SizeInBits);
+ }
+
+ return LLT();
+}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::scalarOrVector(Ty.getVectorElementCount(),
+ Ty.getVectorElementType().getSizeInBits());
+}
+
+const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
+ assert(Ty.isScalar() && "Expected a scalar type.");
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return APFloat::IEEEhalf();
+ case 32:
+ return APFloat::IEEEsingle();
+ case 64:
+ return APFloat::IEEEdouble();
+ case 128:
+ return APFloat::IEEEquad();
+ }
+ llvm_unreachable("Invalid FP type size.");
+}
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index ad8a17f25ec5..8d17cceeb3cd 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -18,10 +18,13 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
using namespace llvm;
@@ -30,6 +33,13 @@ using namespace sampleprofutil;
#define DEBUG_TYPE "mirfs-discriminators"
+// TODO(xur): Remove this option and related code once we make true as the
+// default.
+cl::opt<bool> ImprovedFSDiscriminator(
+ "improved-fs-discriminator", cl::Hidden, cl::init(false),
+ cl::desc("New FS discriminators encoding (incompatible with the original "
+ "encoding)"));
+
char MIRAddFSDiscriminators::ID = 0;
INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
@@ -42,11 +52,12 @@ FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) {
return new MIRAddFSDiscriminators(P);
}
+// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator.
// Compute a hash value using debug line number, and the line numbers from the
// inline stack.
-static uint64_t getCallStackHash(const MachineBasicBlock &BB,
- const MachineInstr &MI,
- const DILocation *DIL) {
+static uint64_t getCallStackHashV0(const MachineBasicBlock &BB,
+ const MachineInstr &MI,
+ const DILocation *DIL) {
auto updateHash = [](const StringRef &Str) -> uint64_t {
if (Str.empty())
return 0;
@@ -62,6 +73,19 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
return Ret;
}
+static uint64_t getCallStackHash(const DILocation *DIL) {
+ auto hashCombine = [](const uint64_t Seed, const uint64_t Val) {
+ std::hash<uint64_t> Hasher;
+ return Seed ^ (Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2));
+ };
+ uint64_t Ret = 0;
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ Ret = hashCombine(Ret, xxh3_64bits(ArrayRef<uint8_t>(DIL->getLine())));
+ Ret = hashCombine(Ret, xxh3_64bits(DIL->getSubprogramLinkageName()));
+ }
+ return Ret;
+}
+
// Traverse the CFG and assign FD discriminators. If two instructions
// have the same lineno and discriminator, but residing in different BBs,
// the latter instruction will get a new discriminator value. The new
@@ -70,11 +94,16 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
- if (!MF.getFunction().shouldEmitDebugInfoForProfiling())
+
+ bool HasPseudoProbe = MF.getFunction().getParent()->getNamedMetadata(
+ PseudoProbeDescMetadataName);
+
+ if (!HasPseudoProbe && !MF.getFunction().shouldEmitDebugInfoForProfiling())
return false;
bool Changed = false;
- using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
+ using LocationDiscriminator =
+ std::tuple<StringRef, unsigned, unsigned, uint64_t>;
using BBSet = DenseSet<const MachineBasicBlock *>;
using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
using LocationDiscriminatorCurrPassMap =
@@ -84,7 +113,12 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
LocationDiscriminatorCurrPassMap LDCM;
// Mask of discriminators before this pass.
- unsigned BitMaskBefore = getN1Bits(LowBit);
+ // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator.
+ unsigned LowBitTemp = LowBit;
+ assert(LowBit > 0 && "LowBit in FSDiscriminator cannot be 0");
+ if (ImprovedFSDiscriminator)
+ LowBitTemp -= 1;
+ unsigned BitMaskBefore = getN1Bits(LowBitTemp);
// Mask of discriminators including this pass.
unsigned BitMaskNow = getN1Bits(HighBit);
// Mask of discriminators for bits specific to this pass.
@@ -92,17 +126,42 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
unsigned NumNewD = 0;
LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
- << MF.getFunction().getName() << "\n");
+ << MF.getFunction().getName() << " Highbit=" << HighBit
+ << "\n");
+
for (MachineBasicBlock &BB : MF) {
for (MachineInstr &I : BB) {
+ if (HasPseudoProbe) {
+ // Only assign discriminators to pseudo probe instructions. Call
+ // instructions are excluded since their dwarf discriminators are used
+ // for other purposes, i.e, storing probe ids.
+ if (!I.isPseudoProbe())
+ continue;
+ } else if (ImprovedFSDiscriminator && I.isMetaInstruction()) {
+ continue;
+ }
const DILocation *DIL = I.getDebugLoc().get();
if (!DIL)
continue;
- unsigned LineNo = DIL->getLine();
+
+ // Use the id of pseudo probe to compute the discriminator.
+ unsigned LineNo =
+ I.isPseudoProbe() ? I.getOperand(1).getImm() : DIL->getLine();
if (LineNo == 0)
continue;
unsigned Discriminator = DIL->getDiscriminator();
- LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator};
+ // Clean up discriminators for pseudo probes at the first FS discriminator
+ // pass as their discriminators should not ever be used.
+ if ((Pass == FSDiscriminatorPass::Pass1) && I.isPseudoProbe()) {
+ Discriminator = 0;
+ I.setDebugLoc(DIL->cloneWithDiscriminator(0));
+ }
+ uint64_t CallStackHashVal = 0;
+ if (ImprovedFSDiscriminator)
+ CallStackHashVal = getCallStackHash(DIL);
+
+ LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator,
+ CallStackHashVal};
auto &BBMap = LDBM[LD];
auto R = BBMap.insert(&BB);
if (BBMap.size() == 1)
@@ -111,7 +170,8 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
unsigned DiscriminatorCurrPass;
DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
- DiscriminatorCurrPass += getCallStackHash(BB, I, DIL);
+ if (!ImprovedFSDiscriminator)
+ DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL);
DiscriminatorCurrPass &= BitMaskThisPass;
unsigned NewD = Discriminator | DiscriminatorCurrPass;
const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index c136b08223b8..a4c1ba340e46 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -214,6 +214,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nsw", MIToken::kw_nsw)
.Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
+ .Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
.Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index ac484cdfd6c8..7149c29d6ba7 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -73,6 +73,7 @@ struct MIToken {
kw_nsw,
kw_exact,
kw_nofpexcept,
+ kw_unpredictable,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 525f49347fc4..bfd9286ff59c 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -62,7 +63,6 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
@@ -470,7 +470,7 @@ public:
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
bool parseMCSymbolOperand(MachineOperand &Dest);
- bool parseMDNode(MDNode *&Node);
+ [[nodiscard]] bool parseMDNode(MDNode *&Node);
bool parseDIExpression(MDNode *&Expr);
bool parseDILocation(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
@@ -1451,7 +1451,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
- Token.is(MIToken::kw_nofpexcept)) {
+ Token.is(MIToken::kw_nofpexcept) ||
+ Token.is(MIToken::kw_unpredictable)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1479,6 +1480,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::IsExact;
if (Token.is(MIToken::kw_nofpexcept))
Flags |= MachineInstr::NoFPExcept;
+ if (Token.is(MIToken::kw_unpredictable))
+ Flags |= MachineInstr::Unpredictable;
lex();
}
@@ -2414,7 +2417,7 @@ bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
bool MIParser::parseCFIOffset(int &Offset) {
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected a cfi offset");
- if (Token.integerValue().getMinSignedBits() > 32)
+ if (Token.integerValue().getSignificantBits() > 32)
return error("expected a 32 bit integer (the cfi offset is too large)");
Offset = (int)Token.integerValue().getExtValue();
lex();
@@ -2520,7 +2523,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
parseCFIAddressSpace(AddressSpace))
return true;
CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa(
- nullptr, Reg, Offset, AddressSpace));
+ nullptr, Reg, Offset, AddressSpace, SMLoc()));
break;
case MIToken::kw_cfi_remember_state:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
@@ -3001,7 +3004,7 @@ bool MIParser::parseOffset(int64_t &Offset) {
lex();
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected an integer literal after '" + Sign + "'");
- if (Token.integerValue().getMinSignedBits() > 64)
+ if (Token.integerValue().getSignificantBits() > 64)
return error("expected 64-bit integer (too large)");
Offset = Token.integerValue().getExtValue();
if (IsNegative)
@@ -3471,7 +3474,8 @@ bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
assert(Token.is(MIToken::kw_heap_alloc_marker) &&
"Invalid token for a heap alloc marker!");
lex();
- parseMDNode(Node);
+ if (parseMDNode(Node))
+ return true;
if (!Node)
return error("expected a MDNode after 'heap-alloc-marker'");
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -3487,7 +3491,8 @@ bool MIParser::parsePCSections(MDNode *&Node) {
assert(Token.is(MIToken::kw_pcsections) &&
"Invalid token for a PC sections!");
lex();
- parseMDNode(Node);
+ if (parseMDNode(Node))
+ return true;
if (!Node)
return error("expected a MDNode after 'pcsections'");
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index a20c2bfe6c0f..b2e570c5e67e 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -130,6 +130,16 @@ public:
const yaml::StringValue &RegisterSource,
bool IsRestored, int FrameIdx);
+ struct VarExprLoc {
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ };
+
+ std::optional<VarExprLoc> parseVarExprLoc(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr,
+ const yaml::StringValue &LocStr);
template <typename T>
bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
const T &Object,
@@ -392,7 +402,7 @@ bool MIRParserImpl::initializeCallSiteInfo(
MachineFunction &MF = PFS.MF;
SMDiagnostic Error;
const LLVMTargetMachine &TM = MF.getTarget();
- for (auto YamlCSInfo : YamlMF.CallSitesInfo) {
+ for (auto &YamlCSInfo : YamlMF.CallSitesInfo) {
yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
if (MILoc.BlockNum >= MF.size())
return error(Twine(MF.getName()) +
@@ -468,6 +478,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.setHasEHCatchret(YamlMF.HasEHCatchret);
MF.setHasEHScopes(YamlMF.HasEHScopes);
MF.setHasEHFunclets(YamlMF.HasEHFunclets);
+ MF.setIsOutlined(YamlMF.IsOutlined);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
@@ -792,6 +803,24 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return true;
}
+ for (const auto &Object : YamlMF.EntryValueObjects) {
+ SMDiagnostic Error;
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, Object.EntryValueRegister.Value,
+ Error))
+ return error(Error, Object.EntryValueRegister.SourceRange);
+ if (!Reg.isPhysical())
+ return error(Object.EntryValueRegister.SourceRange.Start,
+ "Expected physical register for entry value field");
+ std::optional<VarExprLoc> MaybeInfo = parseVarExprLoc(
+ PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
+ return true;
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr,
+ Reg.asMCReg(), MaybeInfo->DILoc);
+ }
+
// Initialize the ordinary frame objects.
for (const auto &Object : YamlMF.StackObjects) {
int ObjectIdx;
@@ -887,26 +916,37 @@ static bool typecheckMDNode(T *&Result, MDNode *Node,
return false;
}
-template <typename T>
-bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
- const T &Object, int FrameIdx) {
- // Debug information can only be attached to stack objects; Fixed stack
- // objects aren't supported.
- MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
- if (parseMDNode(PFS, Var, Object.DebugVar) ||
- parseMDNode(PFS, Expr, Object.DebugExpr) ||
- parseMDNode(PFS, Loc, Object.DebugLoc))
- return true;
- if (!Var && !Expr && !Loc)
- return false;
+std::optional<MIRParserImpl::VarExprLoc> MIRParserImpl::parseVarExprLoc(
+ PerFunctionMIParsingState &PFS, const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr, const yaml::StringValue &LocStr) {
+ MDNode *Var = nullptr;
+ MDNode *Expr = nullptr;
+ MDNode *Loc = nullptr;
+ if (parseMDNode(PFS, Var, VarStr) || parseMDNode(PFS, Expr, ExprStr) ||
+ parseMDNode(PFS, Loc, LocStr))
+ return std::nullopt;
DILocalVariable *DIVar = nullptr;
DIExpression *DIExpr = nullptr;
DILocation *DILoc = nullptr;
- if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
- typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
- typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+ if (typecheckMDNode(DIVar, Var, VarStr, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, ExprStr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, LocStr, "DILocation", *this))
+ return std::nullopt;
+ return VarExprLoc{DIVar, DIExpr, DILoc};
+}
+
+template <typename T>
+bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const T &Object, int FrameIdx) {
+ std::optional<VarExprLoc> MaybeInfo =
+ parseVarExprLoc(PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
return true;
- PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc);
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr, FrameIdx,
+ MaybeInfo->DILoc);
return false;
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 0a4b28ac79a7..b91d9c4727fc 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -47,7 +48,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -119,6 +119,9 @@ public:
const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF, ModuleSlotTracker &MST);
+ void convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
void convertCallSiteObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST);
@@ -200,6 +203,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.HasEHCatchret = MF.hasEHCatchret();
YamlMF.HasEHScopes = MF.hasEHScopes();
YamlMF.HasEHFunclets = MF.hasEHFunclets();
+ YamlMF.IsOutlined = MF.isOutlined();
YamlMF.UseDebugInstrRef = MF.useDebugInstrRef();
YamlMF.Legalized = MF.getProperties().hasProperty(
@@ -220,6 +224,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
+ convertEntryValueObjects(YamlMF, MF, MST);
convertCallSiteObjects(YamlMF, MF, MST);
for (const auto &Sub : MF.DebugValueSubstitutions) {
const auto &SubSrc = Sub.Src;
@@ -372,6 +377,19 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
}
}
+void MIRPrinter::convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (const MachineFunction::VariableDbgInfo &DebugVar :
+ MF.getEntryValueVariableDbgInfo()) {
+ yaml::EntryValueObject &Obj = YMF.EntryValueObjects.emplace_back();
+ printStackObjectDbgInfo(DebugVar, Obj, MST);
+ MCRegister EntryValReg = DebugVar.getEntryValueRegister();
+ printRegMIR(EntryValReg, Obj.EntryValueRegister, TRI);
+ }
+}
+
void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST) {
@@ -490,17 +508,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
- MF.getVariableDbgInfo()) {
- assert(DebugVar.Slot >= MFI.getObjectIndexBegin() &&
- DebugVar.Slot < MFI.getObjectIndexEnd() &&
+ MF.getInStackSlotVariableDbgInfo()) {
+ int Idx = DebugVar.getStackSlot();
+ assert(Idx >= MFI.getObjectIndexBegin() && Idx < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- if (DebugVar.Slot < 0) { // Negative index means fixed objects.
+ if (Idx < 0) { // Negative index means fixed objects.
auto &Object =
- YMF.FixedStackObjects[FixedStackObjectsIdx[DebugVar.Slot +
+ YMF.FixedStackObjects[FixedStackObjectsIdx[Idx +
MFI.getNumFixedObjects()]];
printStackObjectDbgInfo(DebugVar, Object, MST);
} else {
- auto &Object = YMF.StackObjects[StackObjectsIdx[DebugVar.Slot]];
+ auto &Object = YMF.StackObjects[StackObjectsIdx[Idx]];
printStackObjectDbgInfo(DebugVar, Object, MST);
}
}
@@ -783,6 +801,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nofpexcept ";
if (MI.getFlag(MachineInstr::NoMerge))
OS << "nomerge ";
+ if (MI.getFlag(MachineInstr::Unpredictable))
+ OS << "unpredictable ";
OS << TII->getName(MI.getOpcode());
if (I < E)
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index a8996a586909..96f8589e682d 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -18,17 +18,21 @@
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <optional>
using namespace llvm;
using namespace sampleprof;
@@ -57,6 +61,7 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
+extern cl::opt<bool> ImprovedFSDiscriminator;
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
@@ -72,10 +77,11 @@ INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
-FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
- std::string RemappingFile,
- FSDiscriminatorPass P) {
- return new MIRProfileLoaderPass(File, RemappingFile, P);
+FunctionPass *
+llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
+ FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS));
}
namespace llvm {
@@ -89,6 +95,22 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
+ if (MI.isPseudoProbe()) {
+ PseudoProbe Probe;
+ Probe.Id = MI.getOperand(1).getImm();
+ Probe.Type = MI.getOperand(2).getImm();
+ Probe.Attr = MI.getOperand(3).getImm();
+ Probe.Factor = 1;
+ DILocation *DebugLoc = MI.getDebugLoc();
+ Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
+ return Probe;
+ }
+
+ // Ignore callsite probes since they do not have FS discriminators.
+ return std::nullopt;
+}
+
namespace afdo_detail {
template <> struct IRTraits<MachineBasicBlock> {
using InstructionT = MachineInstr;
@@ -118,7 +140,7 @@ template <> struct IRTraits<MachineBasicBlock> {
} // namespace afdo_detail
class MIRProfileLoader final
- : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+ : public SampleProfileLoaderBaseImpl<MachineFunction> {
public:
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
@@ -136,9 +158,10 @@ public:
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
- MIRProfileLoader(StringRef Name, StringRef RemapName)
- : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
- }
+ MIRProfileLoader(StringRef Name, StringRef RemapName,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
+ std::move(FS)) {}
void setBranchProbs(MachineFunction &F);
bool runOnFunction(MachineFunction &F);
@@ -162,11 +185,18 @@ protected:
unsigned HighBit;
bool ProfileIsValid = true;
+ ErrorOr<uint64_t> getInstWeight(const MachineInstr &MI) override {
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(MI);
+ if (ImprovedFSDiscriminator && MI.isMetaInstruction())
+ return std::error_code();
+ return getInstWeightImpl(MI);
+ }
};
template <>
-void SampleProfileLoaderBaseImpl<
- MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+void SampleProfileLoaderBaseImpl<MachineFunction>::computeDominanceAndLoopInfo(
+ MachineFunction &F) {}
void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
@@ -254,8 +284,8 @@ void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
bool MIRProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
- auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
- RemappingFilename);
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(
+ Filename, Ctx, *FS, P, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
@@ -265,20 +295,41 @@ bool MIRProfileLoader::doInitialization(Module &M) {
Reader = std::move(ReaderOrErr.get());
Reader->setModule(&M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
- Reader->getSummary();
+
+ // Load pseudo probe descriptors for probe-based function samples.
+ if (Reader->profileIsProbeBased()) {
+ ProbeManager = std::make_unique<PseudoProbeManager>(M);
+ if (!ProbeManager->moduleIsProbed(M)) {
+ return false;
+ }
+ }
return true;
}
bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ // Do not load non-FS profiles. A line or probe can get a zero-valued
+ // discriminator at certain pass which could result in accidentally loading
+ // the corresponding base counter in the non-FS profile, while a non-zero
+ // discriminator would end up getting zero samples. This could in turn undo
+ // the sample distribution effort done by previous BFI maintenance and the
+ // probe distribution factor work for pseudo probes.
+ if (!Reader->profileIsFS())
+ return false;
+
Function &Func = MF.getFunction();
clearFunctionData(false);
Samples = Reader->getSamplesFor(Func);
if (!Samples || Samples->empty())
return false;
- if (getFunctionLoc(MF) == 0)
- return false;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
+ return false;
+ } else {
+ if (getFunctionLoc(MF) == 0)
+ return false;
+ }
DenseSet<GlobalValue::GUID> InlinedGUIDs;
bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
@@ -291,14 +342,16 @@ bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
} // namespace llvm
-MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName,
- std::string RemappingFileName,
- FSDiscriminatorPass P)
- : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
- MIRSampleLoader(
- std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+MIRProfileLoaderPass::MIRProfileLoaderPass(
+ std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) {
LowBit = getFSPassBitBegin(P);
HighBit = getFSPassBitEnd(P);
+
+ auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem();
+ MIRSampleLoader = std::make_unique<MIRProfileLoader>(
+ FileName, RemappingFileName, std::move(VFS));
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index e634a2b284c3..812d57984e6c 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -39,7 +39,7 @@ VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
StringMap<unsigned> VRegNameCollisionMap;
auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
- if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
+ if (!VRegNameCollisionMap.contains(Reg.getName()))
VRegNameCollisionMap[Reg.getName()] = 0;
const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
return Reg.getName() + "__" + std::to_string(Counter);
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 5cc8ad3d609e..7b3746fde503 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -13,6 +13,7 @@
#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
@@ -38,6 +39,7 @@
#include "llvm/Support/ErrorHandling.h"
#include <array>
+#include <bitset>
#include <memory>
using namespace llvm;
@@ -52,6 +54,14 @@ using CompiledModelType = RegallocEvictModel;
using CompiledModelType = NoopSavedModelImpl;
#endif
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-evict-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-evict-interactive-channel-base>.in, while the "
+ "outgoing name should be "
+ "<regalloc-evict-interactive-channel-base>.out"));
+
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
@@ -74,12 +84,12 @@ static cl::opt<bool> EnableDevelopmentFeatures(
static const bool EnableDevelopmentFeatures = false;
#endif // #ifdef LLVM_HAVE_TFLITE
-extern cl::opt<unsigned> EvictInterferenceCutoff;
-
/// The score injection pass.
/// This pass calculates the score for a function and inserts it in the log, but
/// this happens only in development mode. It's a no-op otherwise.
namespace llvm {
+extern cl::opt<unsigned> EvictInterferenceCutoff;
+
class RegAllocScoring : public MachineFunctionPass {
public:
static char ID;
@@ -213,6 +223,8 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
// will be guaranteed to be to a mask == 1 position. Using a macro here to
// avoid 'not used' warnings (and keep cond compilation to a minimum)
#define DecisionName "index_to_evict"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<int64_t>(DecisionName, {1});
// Named features index.
enum FeatureIDs {
@@ -382,14 +394,21 @@ private:
std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
- if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
- MF.getFunction().getContext(), InputFeatures, DecisionName);
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
return std::make_unique<MLEvictAdvisor>(
MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineLoopInfo>());
}
- std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner;
+ std::unique_ptr<MLModelRunner> Runner;
};
// ===================================
@@ -398,8 +417,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TFLITE
-static const TensorSpec Output =
- TensorSpec::createSpec<int64_t>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
// Features we bind on the model. The tensor names have a prefix, and we also
@@ -458,7 +475,7 @@ public:
void logRewardIfNeeded(const MachineFunction &MF,
llvm::function_ref<float()> GetReward) override {
- if (!Log)
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
return;
// The function pass manager would run all the function passes for a
// function, so we assume the last context belongs to this function. If
@@ -512,7 +529,7 @@ private:
// We always log the output; in particular, if we're not evaluating, we
// don't have an output spec json file. That's why we handle the
// 'normal' output separately.
- LFS.push_back(Output);
+ LFS.push_back(DecisionSpec);
Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
/*IncludeReward*/ true);
@@ -557,6 +574,7 @@ MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
assert(this->Runner);
+ Runner->switchContext(MF.getName());
DoNotNormalize.set(FeatureIDs::mask);
DoNotNormalize.set(FeatureIDs::is_free);
DoNotNormalize.set(FeatureIDs::is_hint);
@@ -594,8 +612,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// Different from the default heuristic, we don't make any assumptions
// about what having more than 10 results in the query may mean.
const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
@@ -1134,7 +1152,10 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
#endif // #ifdef LLVM_HAVE_TFLITE
RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
- return new ReleaseModeEvictionAdvisorAnalysis();
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModeEvictionAdvisorAnalysis()
+ : nullptr;
}
// In all cases except development mode, we don't need scoring.
diff --git a/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
index 320a184bdcc5..422781593a9c 100644
--- a/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
@@ -14,6 +14,7 @@
#include "RegAllocGreedy.h"
#include "RegAllocPriorityAdvisor.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
@@ -40,6 +41,16 @@
using namespace llvm;
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-priority-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-priority-interactive-channel-base>.in, while "
+ "the outgoing name should be "
+ "<regalloc-priority-interactive-channel-base>.out"));
+
+using CompiledModelType = NoopSavedModelImpl;
+
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
@@ -65,6 +76,9 @@ static const std::vector<int64_t> PerLiveRangeShape{1};
M(float, weight, PerLiveRangeShape, "weight")
#define DecisionName "priority"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<float>(DecisionName, {1});
+
// Named features index.
enum FeatureIDs {
@@ -125,13 +139,20 @@ private:
std::unique_ptr<RegAllocPriorityAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
- if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<NoopSavedModelImpl>>(
- MF.getFunction().getContext(), InputFeatures, DecisionName);
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
return std::make_unique<MLPriorityAdvisor>(
MF, RA, &getAnalysis<SlotIndexes>(), Runner.get());
}
- std::unique_ptr<ReleaseModeModelRunner<NoopSavedModelImpl>> Runner;
+ std::unique_ptr<MLModelRunner> Runner;
};
// ===================================
@@ -140,9 +161,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TFLITE
-
-static const TensorSpec Output =
- TensorSpec::createSpec<float>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
@@ -179,7 +197,7 @@ public:
void logRewardIfNeeded(const MachineFunction &MF,
llvm::function_ref<float()> GetReward) override {
- if (!Log)
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
return;
// The function pass manager would run all the function passes for a
// function, so we assume the last context belongs to this function. If
@@ -231,7 +249,7 @@ private:
// We always log the output; in particular, if we're not evaluating, we
// don't have an output spec json file. That's why we handle the
// 'normal' output separately.
- LFS.push_back(Output);
+ LFS.push_back(DecisionSpec);
Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
/*IncludeReward*/ true);
@@ -258,7 +276,10 @@ private:
} // namespace llvm
RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() {
- return new ReleaseModePriorityAdvisorAnalysis();
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModePriorityAdvisorAnalysis()
+ : nullptr;
}
MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
@@ -268,6 +289,7 @@ MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
: RegAllocPriorityAdvisor(MF, RA, Indexes), DefaultAdvisor(MF, RA, Indexes),
Runner(std::move(Runner)) {
assert(this->Runner);
+ Runner->switchContext(MF.getName());
}
float MLPriorityAdvisor::getPriorityImpl(const LiveInterval &LI) const {
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5ef377f2a1c0..231544494c32 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -12,12 +12,14 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -664,6 +666,15 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
getParent()->splice(++NewBefore->getIterator(), getIterator());
}
+static int findJumpTableIndex(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator TerminatorI = MBB.getFirstTerminator();
+ if (TerminatorI == MBB.end())
+ return -1;
+ const MachineInstr &Terminator = *TerminatorI;
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return TII->getJumpTableIndex(Terminator);
+}
+
void MachineBasicBlock::updateTerminator(
MachineBasicBlock *PreviousLayoutSuccessor) {
LLVM_DEBUG(dbgs() << "Updating terminators on " << printMBBReference(*this)
@@ -975,8 +986,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
- if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
- MachineFunction::iterator(FBB) == Fallthrough))
+ if (JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough))
return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
@@ -1033,6 +1044,50 @@ MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
return SplitBB;
}
+// Returns `true` if there are possibly other users of the jump table at
+// `JumpTableIndex` except for the ones in `IgnoreMBB`.
+static bool jumpTableHasOtherUses(const MachineFunction &MF,
+ const MachineBasicBlock &IgnoreMBB,
+ int JumpTableIndex) {
+ assert(JumpTableIndex >= 0 && "need valid index");
+ const MachineJumpTableInfo &MJTI = *MF.getJumpTableInfo();
+ const MachineJumpTableEntry &MJTE = MJTI.getJumpTables()[JumpTableIndex];
+ // Take any basic block from the table; every user of the jump table must
+ // show up in the predecessor list.
+ const MachineBasicBlock *MBB = nullptr;
+ for (MachineBasicBlock *B : MJTE.MBBs) {
+ if (B != nullptr) {
+ MBB = B;
+ break;
+ }
+ }
+ if (MBB == nullptr)
+ return true; // can't rule out other users if there isn't any block.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Pred == &IgnoreMBB)
+ continue;
+ MachineBasicBlock *DummyT = nullptr;
+ MachineBasicBlock *DummyF = nullptr;
+ Cond.clear();
+ if (!TII.analyzeBranch(*Pred, DummyT, DummyF, Cond,
+ /*AllowModify=*/false)) {
+ // analyzable direct jump
+ continue;
+ }
+ int PredJTI = findJumpTableIndex(*Pred);
+ if (PredJTI >= 0) {
+ if (PredJTI == JumpTableIndex)
+ return true;
+ continue;
+ }
+ // Be conservative for unanalyzable jumps.
+ return true;
+ }
+ return false;
+}
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, Pass &P,
std::vector<SparseBitVector<>> *LiveInSets) {
@@ -1044,6 +1099,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
DebugLoc DL; // FIXME: this is nowhere
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+
+ // Is there an indirect jump with jump table?
+ bool ChangedIndirectJump = false;
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0) {
+ MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ MJTI.ReplaceMBBInJumpTable(JTI, Succ, NMBB);
+ ChangedIndirectJump = true;
+ }
+
MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
<< " -- " << printMBBReference(*NMBB) << " -- "
@@ -1066,9 +1131,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
if (LV)
for (MachineInstr &MI :
llvm::make_range(getFirstInstrTerminator(), instr_end())) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
- MO.isUndef())
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() == 0 || !MO.isKill() || MO.isUndef())
continue;
Register Reg = MO.getReg();
if (Reg.isPhysical() || LV->getVarInfo(Reg).removeKill(MI)) {
@@ -1109,7 +1173,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// as the fallthrough successor
if (Succ == PrevFallthrough)
PrevFallthrough = NMBB;
- updateTerminator(PrevFallthrough);
+
+ if (!ChangedIndirectJump)
+ updateTerminator(PrevFallthrough);
if (Indexes) {
SmallVector<MachineInstr*, 4> NewTerminators;
@@ -1284,8 +1350,13 @@ bool MachineBasicBlock::canSplitCriticalEdge(
if (MF->getTarget().requiresStructuredCFG())
return false;
+ // Do we have an Indirect jump with a jumptable that we can rewrite?
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0 && !jumpTableHasOtherUses(*MF, *this, JTI))
+ return true;
+
// We may need to update this's terminator, but we can't do that if
- // analyzeBranch fails. If this uses a jump table, we won't touch it.
+ // analyzeBranch fails.
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
@@ -1391,7 +1462,7 @@ void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
}
}
-/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// Find the next valid DebugLoc starting at MBBI, skipping any debug
/// instructions. Return UnknownLoc if there is none.
DebugLoc
MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
@@ -1403,6 +1474,8 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
}
DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
+ if (MBBI == instr_rend())
+ return findDebugLoc(instr_begin());
// Skip debug declarations, we don't want a DebugLoc from them.
MBBI = skipDebugInstructionsBackward(MBBI, instr_rbegin());
if (!MBBI->isDebugInstr())
@@ -1410,13 +1483,15 @@ DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
return {};
}
-/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
+/// Find the previous valid DebugLoc preceding MBBI, skipping any debug
/// instructions. Return UnknownLoc if there is none.
DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
- if (MBBI == instr_begin()) return {};
+ if (MBBI == instr_begin())
+ return {};
// Skip debug instructions, we don't want a DebugLoc from them.
MBBI = prev_nodbg(MBBI, instr_begin());
- if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc();
+ if (!MBBI->isDebugInstr())
+ return MBBI->getDebugLoc();
return {};
}
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 7bbc347a8cf8..912e9ec993e3 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -213,10 +213,9 @@ static cl::opt<bool> RenumberBlocksBeforeView(
"into a dot graph. Only used when a function is being printed."),
cl::init(false), cl::Hidden);
+namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
extern cl::opt<bool> ApplyExtTspWithoutProfile;
-
-namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -354,15 +353,15 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Pair struct containing basic block and taildup profitability
struct BlockAndTailDupResult {
- MachineBasicBlock *BB;
+ MachineBasicBlock *BB = nullptr;
bool ShouldTailDup;
};
/// Triple struct containing edge weight and the edge.
struct WeightedEdge {
BlockFrequency Weight;
- MachineBasicBlock *Src;
- MachineBasicBlock *Dest;
+ MachineBasicBlock *Src = nullptr;
+ MachineBasicBlock *Dest = nullptr;
};
/// work lists of blocks that are ready to be laid out
@@ -373,32 +372,32 @@ class MachineBlockPlacement : public MachineFunctionPass {
DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
/// Machine Function
- MachineFunction *F;
+ MachineFunction *F = nullptr;
/// A handle to the branch probability pass.
- const MachineBranchProbabilityInfo *MBPI;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
/// A handle to the function-wide block frequency pass.
std::unique_ptr<MBFIWrapper> MBFI;
/// A handle to the loop info.
- MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI = nullptr;
/// Preferred loop exit.
/// Member variable for convenience. It may be removed by duplication deep
/// in the call stack.
- MachineBasicBlock *PreferredLoopExit;
+ MachineBasicBlock *PreferredLoopExit = nullptr;
/// A handle to the target's instruction info.
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
/// A handle to the target's lowering info.
- const TargetLoweringBase *TLI;
+ const TargetLoweringBase *TLI = nullptr;
/// A handle to the post dominator tree.
- MachinePostDominatorTree *MPDT;
+ MachinePostDominatorTree *MPDT = nullptr;
- ProfileSummaryInfo *PSI;
+ ProfileSummaryInfo *PSI = nullptr;
/// Duplicator used to duplicate tails during placement.
///
@@ -412,7 +411,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// True: use block profile count to compute tail duplication cost.
/// False: use block frequency to compute tail duplication cost.
- bool UseProfileCount;
+ bool UseProfileCount = false;
/// Allocator and owner of BlockChain structures.
///
@@ -1160,7 +1159,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// tail-duplicated into.
// Skip any blocks that are already placed or not in this loop.
if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
- || BlockToChain[Pred] == &Chain)
+ || (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))
continue;
if (!TailDup.canTailDuplicate(Succ, Pred)) {
if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
@@ -2018,7 +2017,7 @@ MachineBlockPlacement::FallThroughGains(
for (MachineBasicBlock *Succ : BestPred->successors()) {
if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
continue;
- if (ComputedEdges.find(Succ) != ComputedEdges.end())
+ if (ComputedEdges.contains(Succ))
continue;
BlockChain *SuccChain = BlockToChain[Succ];
if ((SuccChain && (Succ != *SuccChain->begin())) ||
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index cd8644029530..f879c5fcf20c 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -68,12 +68,12 @@ static cl::opt<int>
namespace {
class MachineCSE : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- AliasAnalysis *AA;
- MachineDominatorTree *DT;
- MachineRegisterInfo *MRI;
- MachineBlockFrequencyInfo *MBFI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
public:
static char ID; // Pass identification
@@ -175,9 +175,7 @@ INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (MachineOperand &MO : MI->all_uses()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -291,9 +289,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
PhysDefVector &PhysDefs,
bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_uses()) {
Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -483,8 +479,8 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
// Heuristics #2: If the expression doesn't not use a vr and the only use
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) {
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg().isVirtual()) {
HasVRegUse = true;
break;
}
diff --git a/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
index 1e5b8dd0bbb0..874f726d2947 100644
--- a/llvm/lib/CodeGen/MachineCheckDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -11,6 +11,7 @@
/// DILocalVariable which mir-debugifiy generated before.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 974d570ece51..c65937935ed8 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -63,22 +63,22 @@ static cl::opt<bool> VerifyPatternOrder(
namespace {
class MachineCombiner : public MachineFunctionPass {
- const TargetSubtargetInfo *STI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetSubtargetInfo *STI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
MCSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineLoopInfo *MLI; // Current MachineLoopInfo
- MachineTraceMetrics *Traces;
- MachineTraceMetrics::Ensemble *MinInstr;
- MachineBlockFrequencyInfo *MBFI;
- ProfileSummaryInfo *PSI;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
RegisterClassInfo RegClassInfo;
TargetSchedModel TSchedModel;
/// True if optimizing for code size.
- bool OptSize;
+ bool OptSize = false;
public:
static char ID;
@@ -95,7 +95,8 @@ private:
bool isTransientMI(const MachineInstr *MI);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineTraceMetrics::Trace BlockTrace);
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB);
unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace);
bool
@@ -148,7 +149,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+MachineInstr *
+MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
if (MO.isReg() && MO.getReg().isVirtual())
@@ -207,18 +209,17 @@ bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
unsigned
MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineTraceMetrics::Trace BlockTrace) {
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB) {
SmallVector<unsigned, 16> InstrDepth;
// For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- for (const MachineOperand &MO : InstrPtr->operands()) {
+ for (const MachineOperand &MO : InstrPtr->all_uses()) {
// Check for virtual register operand.
- if (!(MO.isReg() && MO.getReg().isVirtual()))
- continue;
- if (!MO.isUse())
+ if (!MO.getReg().isVirtual())
continue;
unsigned DepthOp = 0;
unsigned LatencyOp = 0;
@@ -237,7 +238,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
InstrPtr, UseIdx);
} else {
MachineInstr *DefInstr = getOperandDef(MO);
- if (DefInstr) {
+ if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
+ MachineTraceStrategy::TS_Local ||
+ DefInstr->getParent() == &MBB)) {
DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
if (!isTransientMI(DefInstr))
LatencyOp = TSchedModel.computeOperandLatency(
@@ -267,11 +270,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
// Check each definition in NewRoot and compute the latency
unsigned NewRootLatency = 0;
- for (const MachineOperand &MO : NewRoot->operands()) {
+ for (const MachineOperand &MO : NewRoot->all_defs()) {
// Check for virtual register operand.
- if (!(MO.isReg() && MO.getReg().isVirtual()))
- continue;
- if (!MO.isDef())
+ if (!MO.getReg().isVirtual())
continue;
// Get the first instruction that uses MO
MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
@@ -374,7 +375,8 @@ bool MachineCombiner::improvesCriticalPathLen(
MachineCombinerPattern Pattern,
bool SlackIsAccurate) {
// Get depth and latency of NewRoot and Root.
- unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned NewRootDepth =
+ getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
@@ -399,8 +401,13 @@ bool MachineCombiner::improvesCriticalPathLen(
// Account for the latency of the inserted and deleted instructions by
unsigned NewRootLatency, RootLatency;
- std::tie(NewRootLatency, RootLatency) =
- getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
+ if (TII->accumulateInstrSeqToRootLatency(*Root)) {
+ std::tie(NewRootLatency, RootLatency) =
+ getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
+ } else {
+ NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back());
+ RootLatency = TSchedModel.computeInstrLatency(Root);
+ }
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
@@ -483,20 +490,19 @@ bool MachineCombiner::preservesResourceLen(
/// \param MI current machine instruction
/// \param InsInstrs new instructions to insert in \p MBB
/// \param DelInstrs instruction to delete from \p MBB
-/// \param MinInstr is a pointer to the machine trace information
+/// \param TraceEnsemble is a pointer to the machine trace information
/// \param RegUnits set of live registers, needed to compute instruction depths
/// \param TII is target instruction info, used to call target hook
/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
-static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
- SmallVector<MachineInstr *, 16> InsInstrs,
- SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics::Ensemble *MinInstr,
- SparseSet<LiveRegUnit> &RegUnits,
- const TargetInstrInfo *TII,
- MachineCombinerPattern Pattern,
- bool IncrementalUpdate) {
+static void insertDeleteInstructions(
+ MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Ensemble *TraceEnsemble,
+ SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
+ MachineCombinerPattern Pattern, bool IncrementalUpdate) {
// If we want to fix up some placeholder for some target, do it now.
// We need this because in genAlternativeCodeSequence, we have not decided the
// better pattern InsInstrs or DelInstrs, so we don't want generate some
@@ -522,9 +528,9 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
if (IncrementalUpdate)
for (auto *InstrPtr : InsInstrs)
- MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+ TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
else
- MinInstr->invalidate(MBB);
+ TraceEnsemble->invalidate(MBB);
NumInstCombined++;
}
@@ -550,7 +556,7 @@ void MachineCombiner::verifyPatternOrder(
unsigned NewRootLatency, RootLatency;
std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
- Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
+ Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB));
long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
assert(CurrentLatencyDiff <= PrevLatencyDiff &&
"Current pattern is better than previous pattern.");
@@ -574,8 +580,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
decltype(BlockIter) LastUpdate;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ if (!TraceEnsemble)
+ TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
SparseSet<LiveRegUnit> RegUnits;
RegUnits.setUniverse(TRI->getNumRegUnits());
@@ -647,7 +653,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (IncrementalUpdate && LastUpdate != BlockIter) {
// Update depths since the last incremental update.
- MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
+ TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
LastUpdate = BlockIter;
}
@@ -661,7 +667,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
// Replace DelInstrs with InsInstrs.
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
Changed |= true;
@@ -674,7 +680,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P)) {
LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -683,7 +689,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
<< InsInstrs.size() << " < "
<< DelInstrs.size() << ")\n");
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -694,7 +700,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// instruction depths incrementally.
// NOTE: Only the instruction depths up to MI are accurate. All other
// trace information is not updated.
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
InstrIdxForVirtReg, P,
@@ -706,7 +712,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
LastUpdate = BlockIter;
}
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
@@ -741,7 +747,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- MinInstr = nullptr;
+ TraceEnsemble = nullptr;
OptSize = MF.getFunction().hasOptSize();
RegClassInfo.runOnMachineFunction(MF);
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 871824553aa4..3453e6c0b8be 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -80,11 +80,15 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
+STATISTIC(SpillageChainsLength, "Length of spillage chains");
+STATISTIC(NumSpillageChains, "Number of spillage chains");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
cl::Hidden);
+static cl::opt<cl::boolOrDefault>
+ EnableSpillageCopyElimination("enable-spill-copy-elim", cl::Hidden);
namespace {
@@ -103,7 +107,7 @@ static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
class CopyTracker {
struct CopyInfo {
- MachineInstr *MI;
+ MachineInstr *MI, *LastSeenUseInCopy;
SmallVector<MCRegister, 4> DefRegs;
bool Avail;
};
@@ -117,8 +121,8 @@ public:
const TargetRegisterInfo &TRI) {
for (MCRegister Reg : Regs) {
// Source of copy is no longer available for propagation.
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto CI = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto CI = Copies.find(Unit);
if (CI != Copies.end())
CI->second.Avail = false;
}
@@ -133,8 +137,8 @@ public:
// and invalidate all of them.
SmallSet<MCRegister, 8> RegsToInvalidate;
RegsToInvalidate.insert(Reg);
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
std::optional<DestSourcePair> CopyOperands =
@@ -150,15 +154,15 @@ public:
}
}
for (MCRegister InvalidReg : RegsToInvalidate)
- for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
- Copies.erase(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(InvalidReg))
+ Copies.erase(Unit);
}
/// Clobber a single register, removing it from the tracker's copy maps.
void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII, bool UseCopyInstr) {
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
if (I != Copies.end()) {
// When we clobber the source of a copy, we need to clobber everything
// it defined.
@@ -188,16 +192,17 @@ public:
MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
// Remember Def is defined by the copy.
- for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
- Copies[*RUI] = {MI, {}, true};
+ for (MCRegUnit Unit : TRI.regunits(Def))
+ Copies[Unit] = {MI, nullptr, {}, true};
// Remember source that's copied to Def. Once it's clobbered, then
// it's no longer available for copy propagation.
- for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.insert({*RUI, {nullptr, {}, false}});
+ for (MCRegUnit Unit : TRI.regunits(Src)) {
+ auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}});
auto &Copy = I.first->second;
if (!is_contained(Copy.DefRegs, Def))
Copy.DefRegs.push_back(Def);
+ Copy.LastSeenUseInCopy = MI;
}
}
@@ -223,16 +228,16 @@ public:
return nullptr;
if (CI->second.DefRegs.size() != 1)
return nullptr;
- MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
- return findCopyForUnit(*RUI, TRI, true);
+ MCRegUnit RU = *TRI.regunits(CI->second.DefRegs[0]).begin();
+ return findCopyForUnit(RU, TRI, true);
}
MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII,
bool UseCopyInstr) {
- MCRegUnitIterator RUI(Reg, &TRI);
- MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ MachineInstr *AvailCopy = findCopyDefViaUnit(RU, TRI);
if (!AvailCopy)
return nullptr;
@@ -260,9 +265,9 @@ public:
const TargetInstrInfo &TII, bool UseCopyInstr) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
- MCRegUnitIterator RUI(Reg, &TRI);
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
MachineInstr *AvailCopy =
- findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
+ findCopyForUnit(RU, TRI, /*MustBeAvailable=*/true);
if (!AvailCopy)
return nullptr;
@@ -286,15 +291,57 @@ public:
return AvailCopy;
}
+ // Find last COPY that defines Reg before Current MachineInstr.
+ MachineInstr *findLastSeenDefInCopy(const MachineInstr &Current,
+ MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end() || !CI->second.Avail)
+ return nullptr;
+
+ MachineInstr *DefCopy = CI->second.MI;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*DefCopy, TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(Def, Reg))
+ return nullptr;
+
+ for (const MachineInstr &MI :
+ make_range(static_cast<const MachineInstr *>(DefCopy)->getIterator(),
+ Current.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(Def)) {
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of "
+ << printReg(Def, &TRI) << "\n");
+ return nullptr;
+ }
+
+ return DefCopy;
+ }
+
+ // Find last COPY that uses Reg.
+ MachineInstr *findLastSeenUseInCopy(MCRegister Reg,
+ const TargetRegisterInfo &TRI) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end())
+ return nullptr;
+ return CI->second.LastSeenUseInCopy;
+ }
+
void clear() {
Copies.clear();
}
};
class MachineCopyPropagation : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
// Return true if this is a copy instruction and false otherwise.
bool UseCopyInstr;
@@ -325,6 +372,7 @@ private:
void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void EliminateSpillageCopies(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
void forwardUses(MachineInstr &MI);
void propagateDefs(MachineInstr &MI);
@@ -345,7 +393,7 @@ private:
CopyTracker Tracker;
- bool Changed;
+ bool Changed = false;
};
} // end anonymous namespace
@@ -362,8 +410,8 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination. If a copy is "read" by a debug user, record the user
// for propagation.
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
- if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) {
if (DT == RegularUse) {
LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
MaybeDeadCopies.remove(Copy);
@@ -433,6 +481,12 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
make_range(PrevCopy->getIterator(), Copy.getIterator()))
MI.clearRegisterKills(CopyDef, TRI);
+ // Clear undef flag from remaining copy if needed.
+ if (!CopyOperands->Source->isUndef()) {
+ PrevCopy->getOperand(PrevCopyOperands->Source->getOperandNo())
+ .setIsUndef(false);
+ }
+
Copy.eraseFromParent();
Changed = true;
++NumDeletes;
@@ -595,12 +649,19 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
const MachineOperand &CopySrc = *CopyOperands->Source;
Register CopySrcReg = CopySrc.getReg();
- // FIXME: Don't handle partial uses of wider COPYs yet.
+ Register ForwardedReg = CopySrcReg;
+ // MI might use a sub-register of the Copy destination, in which case the
+ // forwarded register is the matching sub-register of the Copy source.
if (MOUse.getReg() != CopyDstReg) {
- LLVM_DEBUG(
- dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n "
- << MI);
- continue;
+ unsigned SubRegIdx = TRI->getSubRegIndex(CopyDstReg, MOUse.getReg());
+ assert(SubRegIdx &&
+ "MI source is not a sub-register of Copy destination");
+ ForwardedReg = TRI->getSubReg(CopySrcReg, SubRegIdx);
+ if (!ForwardedReg) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source does not have sub-register "
+ << TRI->getSubRegIndexName(SubRegIdx) << '\n');
+ continue;
+ }
}
// Don't forward COPYs of reserved regs unless they are constant.
@@ -630,10 +691,11 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
}
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
- << "\n with " << printReg(CopySrcReg, TRI)
+ << "\n with " << printReg(ForwardedReg, TRI)
<< "\n in " << MI << " from " << *Copy);
- MOUse.setReg(CopySrcReg);
+ MOUse.setReg(ForwardedReg);
+
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);
MOUse.setIsUndef(CopySrc.isUndef());
@@ -844,16 +906,11 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clear();
}
-static bool isBackwardPropagatableCopy(MachineInstr &MI,
+static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands,
const MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII,
- bool UseCopyInstr) {
- std::optional<DestSourcePair> CopyOperands =
- isCopyInstr(MI, TII, UseCopyInstr);
- assert(CopyOperands && "MI is expected to be a COPY");
-
- Register Def = CopyOperands->Destination->getReg();
- Register Src = CopyOperands->Source->getReg();
+ const TargetInstrInfo &TII) {
+ Register Def = CopyOperands.Destination->getReg();
+ Register Src = CopyOperands.Source->getReg();
if (!Def || !Src)
return false;
@@ -861,7 +918,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI,
if (MRI.isReserved(Def) || MRI.isReserved(Src))
return false;
- return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill();
+ return CopyOperands.Source->isRenamable() && CopyOperands.Source->isKill();
}
void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
@@ -936,14 +993,13 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Register SrcReg = CopyOperands->Source->getReg();
if (!TRI->regsOverlap(DefReg, SrcReg)) {
- MCRegister Def = DefReg.asMCReg();
- MCRegister Src = SrcReg.asMCReg();
-
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) {
- Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr);
- Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr);
+ if (isBackwardPropagatableCopy(*CopyOperands, *MRI, *TII)) {
+ Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+ Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
continue;
}
@@ -976,9 +1032,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
// Check if the register in the debug instruction is utilized
// in a copy instruction, so we can update the debug info if the
// register is changed.
- for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid();
- ++RUI) {
- if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
+ if (auto *Copy = Tracker.findCopyDefViaUnit(Unit, *TRI)) {
CopyDbgUsers[Copy].insert(&MI);
}
}
@@ -1008,10 +1063,345 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.clear();
}
+static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
+ MachineInstr *Leader) {
+ auto &SC = SpillChain[Leader];
+ auto &RC = ReloadChain[Leader];
+ for (auto I = SC.rbegin(), E = SC.rend(); I != E; ++I)
+ (*I)->dump();
+ for (MachineInstr *MI : RC)
+ MI->dump();
+}
+
+// Remove spill-reload like copy chains. For example
+// r0 = COPY r1
+// r1 = COPY r2
+// r2 = COPY r3
+// r3 = COPY r4
+// <def-use r4>
+// r4 = COPY r3
+// r3 = COPY r2
+// r2 = COPY r1
+// r1 = COPY r0
+// will be folded into
+// r0 = COPY r1
+// r1 = COPY r4
+// <def-use r4>
+// r4 = COPY r1
+// r1 = COPY r0
+// TODO: Currently we don't track usage of r0 outside the chain, so we
+// conservatively keep its value as it was before the rewrite.
+//
+// The algorithm is trying to keep
+// property#1: No Def of spill COPY in the chain is used or defined until the
+// paired reload COPY in the chain uses the Def.
+//
+// property#2: NO Source of COPY in the chain is used or defined until the next
+// COPY in the chain defines the Source, except the innermost spill-reload
+// pair.
+//
+// The algorithm is conducted by checking every COPY inside the MBB, assuming
+// the COPY is a reload COPY, then try to find paired spill COPY by searching
+// the COPY defines the Src of the reload COPY backward. If such pair is found,
+// it either belongs to an existing chain or a new chain depends on
+// last available COPY uses the Def of the reload COPY.
+// Implementation notes, we use CopyTracker::findLastDefCopy(Reg, ...) to find
+// out last COPY that defines Reg; we use CopyTracker::findLastUseCopy(Reg, ...)
+// to find out last COPY that uses Reg. When we are encountered with a Non-COPY
+// instruction, we check registers in the operands of this instruction. If this
+// Reg is defined by a COPY, we untrack this Reg via
+// CopyTracker::clobberRegister(Reg, ...).
+void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) {
+ // ChainLeader maps MI inside a spill-reload chain to its innermost reload COPY.
+ // Thus we can track if a MI belongs to an existing spill-reload chain.
+ DenseMap<MachineInstr *, MachineInstr *> ChainLeader;
+ // SpillChain maps innermost reload COPY of a spill-reload chain to a sequence
+ // of COPYs that forms spills of a spill-reload chain.
+ // ReloadChain maps innermost reload COPY of a spill-reload chain to a
+ // sequence of COPYs that forms reloads of a spill-reload chain.
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> SpillChain, ReloadChain;
+ // If a COPY's Source has use or def until next COPY defines the Source,
+ // we put the COPY in this set to keep property#2.
+ DenseSet<const MachineInstr *> CopySourceInvalid;
+
+ auto TryFoldSpillageCopies =
+ [&, this](const SmallVectorImpl<MachineInstr *> &SC,
+ const SmallVectorImpl<MachineInstr *> &RC) {
+ assert(SC.size() == RC.size() && "Spill-reload should be paired");
+
+ // We need at least 3 pairs of copies for the transformation to apply,
+ // because the first outermost pair cannot be removed since we don't
+ // recolor outside of the chain and that we need at least one temporary
+ // spill slot to shorten the chain. If we only have a chain of two
+ // pairs, we already have the shortest sequence this code can handle:
+ // the outermost pair for the temporary spill slot, and the pair that
+ // use that temporary spill slot for the other end of the chain.
+ // TODO: We might be able to simplify to one spill-reload pair if collecting
+ // more infomation about the outermost COPY.
+ if (SC.size() <= 2)
+ return;
+
+ // If violate property#2, we don't fold the chain.
+ for (const MachineInstr *Spill : make_range(SC.begin() + 1, SC.end()))
+ if (CopySourceInvalid.count(Spill))
+ return;
+
+ for (const MachineInstr *Reload : make_range(RC.begin(), RC.end() - 1))
+ if (CopySourceInvalid.count(Reload))
+ return;
+
+ auto CheckCopyConstraint = [this](Register Def, Register Src) {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(Def) && RC->contains(Src))
+ return true;
+ }
+ return false;
+ };
+
+ auto UpdateReg = [](MachineInstr *MI, const MachineOperand *Old,
+ const MachineOperand *New) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (&MO == Old)
+ MO.setReg(New->getReg());
+ }
+ };
+
+ std::optional<DestSourcePair> InnerMostSpillCopy =
+ isCopyInstr(*SC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostSpillCopy =
+ isCopyInstr(*SC.back(), *TII, UseCopyInstr);
+ std::optional<DestSourcePair> InnerMostReloadCopy =
+ isCopyInstr(*RC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostReloadCopy =
+ isCopyInstr(*RC.back(), *TII, UseCopyInstr);
+ if (!CheckCopyConstraint(OuterMostSpillCopy->Source->getReg(),
+ InnerMostSpillCopy->Source->getReg()) ||
+ !CheckCopyConstraint(InnerMostReloadCopy->Destination->getReg(),
+ OuterMostReloadCopy->Destination->getReg()))
+ return;
+
+ SpillageChainsLength += SC.size() + RC.size();
+ NumSpillageChains += 1;
+ UpdateReg(SC[0], InnerMostSpillCopy->Destination,
+ OuterMostSpillCopy->Source);
+ UpdateReg(RC[0], InnerMostReloadCopy->Source,
+ OuterMostReloadCopy->Destination);
+
+ for (size_t I = 1; I < SC.size() - 1; ++I) {
+ SC[I]->eraseFromParent();
+ RC[I]->eraseFromParent();
+ NumDeletes += 2;
+ }
+ };
+
+ auto IsFoldableCopy = [this](const MachineInstr &MaybeCopy) {
+ if (MaybeCopy.getNumImplicitOperands() > 0)
+ return false;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MaybeCopy, *TII, UseCopyInstr);
+ if (!CopyOperands)
+ return false;
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ return Src && Def && !TRI->regsOverlap(Src, Def) &&
+ CopyOperands->Source->isRenamable() &&
+ CopyOperands->Destination->isRenamable();
+ };
+
+ auto IsSpillReloadPair = [&, this](const MachineInstr &Spill,
+ const MachineInstr &Reload) {
+ if (!IsFoldableCopy(Spill) || !IsFoldableCopy(Reload))
+ return false;
+ std::optional<DestSourcePair> SpillCopy =
+ isCopyInstr(Spill, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> ReloadCopy =
+ isCopyInstr(Reload, *TII, UseCopyInstr);
+ if (!SpillCopy || !ReloadCopy)
+ return false;
+ return SpillCopy->Source->getReg() == ReloadCopy->Destination->getReg() &&
+ SpillCopy->Destination->getReg() == ReloadCopy->Source->getReg();
+ };
+
+ auto IsChainedCopy = [&, this](const MachineInstr &Prev,
+ const MachineInstr &Current) {
+ if (!IsFoldableCopy(Prev) || !IsFoldableCopy(Current))
+ return false;
+ std::optional<DestSourcePair> PrevCopy =
+ isCopyInstr(Prev, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CurrentCopy =
+ isCopyInstr(Current, *TII, UseCopyInstr);
+ if (!PrevCopy || !CurrentCopy)
+ return false;
+ return PrevCopy->Source->getReg() == CurrentCopy->Destination->getReg();
+ };
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
+
+ // Update track information via non-copy instruction.
+ SmallSet<Register, 8> RegsToClobber;
+ if (!CopyOperands) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ MachineInstr *LastUseCopy =
+ Tracker.findLastSeenUseInCopy(Reg.asMCReg(), *TRI);
+ if (LastUseCopy) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source of\n");
+ LLVM_DEBUG(LastUseCopy->dump());
+ LLVM_DEBUG(dbgs() << "might be invalidated by\n");
+ LLVM_DEBUG(MI.dump());
+ CopySourceInvalid.insert(LastUseCopy);
+ }
+ // Must be noted Tracker.clobberRegister(Reg, ...) removes tracking of
+ // Reg, i.e, COPY that defines Reg is removed from the mapping as well
+ // as marking COPYs that uses Reg unavailable.
+ // We don't invoke CopyTracker::clobberRegister(Reg, ...) if Reg is not
+ // defined by a previous COPY, since we don't want to make COPYs uses
+ // Reg unavailable.
+ if (Tracker.findLastSeenDefInCopy(MI, Reg.asMCReg(), *TRI, *TII,
+ UseCopyInstr))
+ // Thus we can keep the property#1.
+ RegsToClobber.insert(Reg);
+ }
+ for (Register Reg : RegsToClobber) {
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Reg, TRI)
+ << "\n");
+ }
+ continue;
+ }
+
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ // Check if we can find a pair spill-reload copy.
+ LLVM_DEBUG(dbgs() << "MCP: Searching paired spill for reload: ");
+ LLVM_DEBUG(MI.dump());
+ MachineInstr *MaybeSpill =
+ Tracker.findLastSeenDefInCopy(MI, Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ bool MaybeSpillIsChained = ChainLeader.count(MaybeSpill);
+ if (!MaybeSpillIsChained && MaybeSpill &&
+ IsSpillReloadPair(*MaybeSpill, MI)) {
+ // Check if we already have an existing chain. Now we have a
+ // spill-reload pair.
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Looking for a valid COPY before L5 which uses r3.
+ // This can be serverial cases.
+ // Case #1:
+ // No COPY is found, which can be r3 is def-use between (L2, L5), we
+ // create a new chain for L2 and L5.
+ // Case #2:
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Such COPY is found and is L2, we create a new chain for L2 and L5.
+ // Case #3:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // we create a new chain for L2 and L5.
+ // Case #4:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L4: r3 = COPY r1
+ // L5: r3 = COPY r2
+ // Such COPY won't be found since L4 defines r3. we create a new chain
+ // for L2 and L5.
+ // Case #5:
+ // L2: r2 = COPY r3
+ // L3: r3 = COPY r1
+ // L4: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // COPY is found and is L4 which belongs to an existing chain, we add
+ // L2 and L5 to this chain.
+ LLVM_DEBUG(dbgs() << "MCP: Found spill: ");
+ LLVM_DEBUG(MaybeSpill->dump());
+ MachineInstr *MaybePrevReload =
+ Tracker.findLastSeenUseInCopy(Def.asMCReg(), *TRI);
+ auto Leader = ChainLeader.find(MaybePrevReload);
+ MachineInstr *L = nullptr;
+ if (Leader == ChainLeader.end() ||
+ (MaybePrevReload && !IsChainedCopy(*MaybePrevReload, MI))) {
+ L = &MI;
+ assert(!SpillChain.count(L) &&
+ "SpillChain should not have contained newly found chain");
+ } else {
+ assert(MaybePrevReload &&
+ "Found a valid leader through nullptr should not happend");
+ L = Leader->second;
+ assert(SpillChain[L].size() > 0 &&
+ "Existing chain's length should be larger than zero");
+ }
+ assert(!ChainLeader.count(&MI) && !ChainLeader.count(MaybeSpill) &&
+ "Newly found paired spill-reload should not belong to any chain "
+ "at this point");
+ ChainLeader.insert({MaybeSpill, L});
+ ChainLeader.insert({&MI, L});
+ SpillChain[L].push_back(MaybeSpill);
+ ReloadChain[L].push_back(&MI);
+ LLVM_DEBUG(dbgs() << "MCP: Chain " << L << " now is:\n");
+ LLVM_DEBUG(printSpillReloadChain(SpillChain, ReloadChain, L));
+ } else if (MaybeSpill && !MaybeSpillIsChained) {
+ // MaybeSpill is unable to pair with MI. That's to say adding MI makes
+ // the chain invalid.
+ // The COPY defines Src is no longer considered as a candidate of a
+ // valid chain. Since we expect the Def of a spill copy isn't used by
+ // any COPY instruction until a reload copy. For example:
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // If we later have
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // L3: r2 = COPY r1
+ // L1 and L3 can't be a valid spill-reload pair.
+ // Thus we keep the property#1.
+ LLVM_DEBUG(dbgs() << "MCP: Not paired spill-reload:\n");
+ LLVM_DEBUG(MaybeSpill->dump());
+ LLVM_DEBUG(MI.dump());
+ Tracker.clobberRegister(Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Src, TRI)
+ << "\n");
+ }
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ }
+
+ for (auto I = SpillChain.begin(), E = SpillChain.end(); I != E; ++I) {
+ auto &SC = I->second;
+ assert(ReloadChain.count(I->first) &&
+ "Reload chain of the same leader should exist");
+ auto &RC = ReloadChain[I->first];
+ TryFoldSpillageCopies(SC, RC);
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ bool isSpillageCopyElimEnabled = false;
+ switch (EnableSpillageCopyElimination) {
+ case cl::BOU_UNSET:
+ isSpillageCopyElimEnabled =
+ MF.getSubtarget().enableSpillageCopyElimination();
+ break;
+ case cl::BOU_TRUE:
+ isSpillageCopyElimEnabled = true;
+ break;
+ case cl::BOU_FALSE:
+ isSpillageCopyElimEnabled = false;
+ break;
+ }
+
Changed = false;
TRI = MF.getSubtarget().getRegisterInfo();
@@ -1019,6 +1409,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
for (MachineBasicBlock &MBB : MF) {
+ if (isSpillageCopyElimEnabled)
+ EliminateSpillageCopies(MBB);
BackwardCopyPropagateBlock(MBB);
ForwardCopyPropagateBlock(MBB);
}
diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index adf1b51a950d..c264e199cf47 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -116,8 +116,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Emit DBG_VALUEs for register definitions.
SmallVector<MachineOperand *, 4> RegDefs;
- for (MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.getReg())
+ for (MachineOperand &MO : MI.all_defs())
+ if (MO.getReg())
RegDefs.push_back(&MO);
for (MachineOperand *MO : RegDefs)
BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index daf6a218165d..280d3a6a41ed 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -128,8 +128,8 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
// Saved CSRs are not pristine.
for (const auto &I : getCalleeSavedInfo())
- for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
- BV.reset(*S);
+ for (MCPhysReg S : TRI->subregs_inclusive(I.getReg()))
+ BV.reset(S);
return BV;
}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 59e6647fa643..88939e96e07f 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -22,7 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
@@ -119,7 +120,7 @@ void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) {
auto *MetadataName = "unsafe-stack-size";
if (auto &N = Existing->getOperand(0)) {
- if (cast<MDString>(N.get())->getString() == MetadataName) {
+ if (N.equalsStr(MetadataName)) {
if (auto &Op = Existing->getOperand(1)) {
auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue();
FrameInfo.setUnsafeStackSize(Val);
@@ -211,6 +212,14 @@ void MachineFunction::init() {
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
+ // -fsanitize=function and -fsanitize=kcfi instrument indirect function calls
+ // to load a type hash before the function label. Ensure functions are aligned
+ // by a least 4 to avoid unaligned access, which is especially important for
+ // -mno-unaligned-access.
+ if (F.hasMetadata(LLVMContext::MD_func_sanitize) ||
+ F.getMetadata(LLVMContext::MD_kcfi_type))
+ Alignment = std::max(Alignment, Align(4));
+
if (AlignAllFunctions)
Alignment = Align(1ULL << AlignAllFunctions);
@@ -427,8 +436,7 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
// be triggered during the implementation of support for the
// call site info of a new architecture. If the assertion is triggered,
// back trace will tell where to insert a call to updateCallSiteInfo().
- assert((!MI->isCandidateForCallSiteEntry() ||
- CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
+ assert((!MI->isCandidateForCallSiteEntry() || !CallSitesInfo.contains(MI)) &&
"Call site info was not updated!");
// Strip it for parts. The operand array and the MI object itself are
// independently recyclable.
@@ -1083,11 +1091,10 @@ auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
if (State.first.isVirtual()) {
// Virtual register def -- we can just look up where this happens.
MachineInstr *Inst = MRI.def_begin(State.first)->getParent();
- for (auto &MO : Inst->operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.getReg() != State.first)
+ for (auto &MO : Inst->all_defs()) {
+ if (MO.getReg() != State.first)
continue;
- return ApplySubregisters(
- {Inst->getDebugInstrNum(), Inst->getOperandNo(&MO)});
+ return ApplySubregisters({Inst->getDebugInstrNum(), MO.getOperandNo()});
}
llvm_unreachable("Vreg def with no corresponding operand?");
@@ -1102,14 +1109,13 @@ auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
auto RMII = CurInst->getReverseIterator();
auto PrevInstrs = make_range(RMII, CurInst->getParent()->instr_rend());
for (auto &ToExamine : PrevInstrs) {
- for (auto &MO : ToExamine.operands()) {
+ for (auto &MO : ToExamine.all_defs()) {
// Test for operand that defines something aliasing RegToSeek.
- if (!MO.isReg() || !MO.isDef() ||
- !TRI.regsOverlap(RegToSeek, MO.getReg()))
+ if (!TRI.regsOverlap(RegToSeek, MO.getReg()))
continue;
return ApplySubregisters(
- {ToExamine.getDebugInstrNum(), ToExamine.getOperandNo(&MO)});
+ {ToExamine.getDebugInstrNum(), MO.getOperandNo()});
}
}
@@ -1395,7 +1401,7 @@ MachineConstantPool::~MachineConstantPool() {
}
/// Test whether the given two constants can be allocated the same constant pool
-/// entry.
+/// entry referenced by \param A.
static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
const DataLayout &DL) {
// Handle the trivial case quickly.
@@ -1415,6 +1421,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
return false;
+ bool ContainsUndefOrPoisonA = A->containsUndefOrPoisonElement();
+
Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
// Try constant folding a bitcast of both instructions to an integer. If we
@@ -1434,7 +1442,14 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B),
IntTy, DL);
- return A == B;
+ if (A != B)
+ return false;
+
+ // Constants only safely match if A doesn't contain undef/poison.
+ // As we'll be reusing A, it doesn't matter if B contain undef/poison.
+ // TODO: Handle cases where A and B have the same undef/poison elements.
+ // TODO: Merge A and B with mismatching undef/poison elements.
+ return !ContainsUndefOrPoisonA;
}
/// Create a new entry in the constant pool or return an existing one.
@@ -1490,6 +1505,17 @@ void MachineConstantPool::print(raw_ostream &OS) const {
}
}
+//===----------------------------------------------------------------------===//
+// Template specialization for MachineFunction implementation of
+// ProfileSummaryInfo::getEntryCount().
+//===----------------------------------------------------------------------===//
+template <>
+std::optional<Function::ProfileCount>
+ProfileSummaryInfo::getEntryCount<llvm::MachineFunction>(
+ const llvm::MachineFunction *F) const {
+ return F->getFunction().getEntryCount();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); }
#endif
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 613c52900331..fbc071536d22 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -24,6 +24,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHUtils.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -83,88 +86,44 @@ public:
} // end anonymous namespace
/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
-/// only by EH pad as cold. This will help mark EH pads statically cold instead
-/// of relying on profile data.
-static void
-setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks,
- MachineFunction &MF) {
- MachineBasicBlock *StartBlock = &MF.front();
- // A block can be unknown if its not reachable from anywhere
- // EH if its only reachable from start blocks via some path through EH pads
- // NonEH if it's reachable from Non EH blocks as well.
- enum Status { Unknown = 0, EH = 1, NonEH = 2 };
- DenseSet<MachineBasicBlock *> WorkList;
- DenseMap<MachineBasicBlock *, Status> Statuses;
-
- auto getStatus = [&](MachineBasicBlock *MBB) {
- if (Statuses.find(MBB) != Statuses.end())
- return Statuses[MBB];
- else
- return Unknown;
- };
-
- auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
- for (auto *PredMBB : MBB->predecessors()) {
- Status PredStatus = getStatus(PredMBB);
- // If status of predecessor block has gone above current block
- // we update current blocks status.
- if (PredStatus > Stat)
- Stat = PredStatus;
- }
- return Stat;
- };
-
- auto addSuccesors = [&](MachineBasicBlock *MBB) {
- for (auto *SuccMBB : MBB->successors()) {
- if (!SuccMBB->isEHPad())
- WorkList.insert(SuccMBB);
- }
- };
-
- // Insert the successors of start block
- // and landing pads successor.
- Statuses[StartBlock] = NonEH;
- addSuccesors(StartBlock);
- for (auto *LP : EHBlocks) {
- addSuccesors(LP);
- Statuses[LP] = EH;
- }
-
- // Worklist iterative algorithm.
- while (!WorkList.empty()) {
- auto *MBB = *WorkList.begin();
- WorkList.erase(MBB);
-
- Status OldStatus = getStatus(MBB);
-
- // Check on predecessors and check for
- // Status update.
- Status NewStatus = checkPredecessors(MBB, OldStatus);
-
- // Did the block status change?
- bool changed = OldStatus != NewStatus;
- if (changed) {
- addSuccesors(MBB);
- Statuses[MBB] = NewStatus;
- }
+/// only by EH pad as cold. This will help mark EH pads statically cold
+/// instead of relying on profile data.
+static void setDescendantEHBlocksCold(MachineFunction &MF) {
+ DenseSet<MachineBasicBlock *> EHBlocks;
+ computeEHOnlyBlocks(MF, EHBlocks);
+ for (auto Block : EHBlocks) {
+ Block->setSectionID(MBBSectionID::ColdSectionID);
}
+}
- for (auto Entry : Statuses) {
- if (Entry.second == EH)
- Entry.first->setSectionID(MBBSectionID::ColdSectionID);
- }
+static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
+ auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+ return X.getSectionID().Type < Y.getSectionID().Type;
+ };
+ llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ llvm::avoidZeroOffsetLandingPad(MF);
}
static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
- if (!Count)
- return true;
-
- if (PercentileCutoff > 0) {
- return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ // For instrumentation profiles and sample profiles, we use different ways
+ // to judge whether a block is cold and should be split.
+ if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
+ // If using instrument profile, which is deemed "accurate", no count means
+ // cold.
+ if (!Count)
+ return true;
+ if (PercentileCutoff > 0)
+ return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ // Fallthrough to end of function.
+ } else if (PSI->hasSampleProfile()) {
+ // For sample profile, no count means "do not judege coldness".
+ if (!Count)
+ return false;
}
+
return (*Count < ColdCountThreshold);
}
@@ -204,6 +163,17 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (UseProfileData) {
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ // If we don't have a good profile (sample profile is not deemed
+ // as a "good profile") and the function is not hot, then early
+ // return. (Because we can only trust hot functions when profile
+ // quality is not good.)
+ if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
+ // Split all EH code and it's descendant statically by default.
+ if (SplitAllEHCode)
+ setDescendantEHBlocksCold(MF);
+ finishAdjustingBasicBlocksAndLandingPads(MF);
+ return true;
+ }
}
SmallVector<MachineBasicBlock *, 2> LandingPads;
@@ -219,9 +189,10 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// Split all EH code and it's descendant statically by default.
if (SplitAllEHCode)
- setDescendantEHBlocksCold(LandingPads, MF);
+ setDescendantEHBlocksCold(MF);
// We only split out eh pads if all of them are cold.
else {
+ // Here we have UseProfileData == true.
bool HasHotLandingPads = false;
for (const MachineBasicBlock *LP : LandingPads) {
if (!isColdBlock(*LP, MBFI, PSI))
@@ -232,11 +203,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
LP->setSectionID(MBBSectionID::ColdSectionID);
}
}
- auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
- return X.getSectionID().Type < Y.getSectionID().Type;
- };
- llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
- llvm::avoidZeroOffsetLandingPad(MF);
+
+ finishAdjustingBasicBlocksAndLandingPads(MF);
return true;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 8e0777f8438a..a9309487a7a7 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,6 +29,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -49,7 +51,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -95,7 +96,8 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
DebugLoc DL, bool NoImp)
- : MCID(&TID), DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ : MCID(&TID), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ DbgLoc(std::move(DL)), DebugInstrNum(0) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -113,8 +115,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
/// Does not copy the number from debug instruction numbering, to preserve
/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), DbgLoc(MI.getDebugLoc()),
- DebugInstrNum(0) {
+ : MCID(&MI.getDesc()), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -149,6 +151,12 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
+const MachineRegisterInfo *MachineInstr::getRegInfo() const {
+ if (const MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return nullptr;
+}
+
void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
@@ -185,6 +193,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
/// an explicit operand it is added at the end of the explicit operand list
/// (before the first implicit operand).
void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(isUInt<LLVM_MI_NUMOPERANDS_BITS>(NumOperands + 1) &&
+ "Cannot add more operands.");
assert(MCID && "Cannot add operands before providing an instr descriptor");
// Check if we're adding one of our existing operands.
@@ -526,14 +536,14 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPCSections(MF, MI.getPCSections());
}
-uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
+uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
-uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
- uint16_t MIFlags = 0;
+uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint32_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
@@ -567,6 +577,9 @@ uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::FmReassoc;
}
+ if (I.getMetadata(LLVMContext::MD_unpredictable))
+ MIFlags |= MachineInstr::MIFlag::Unpredictable;
+
return MIFlags;
}
@@ -1715,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
- if (isDebugValue() && MO.isMetadata()) {
+ if (isDebugValueLike() && MO.isMetadata()) {
// Pretty print DBG_VALUE* instructions.
auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
if (DIV && !DIV->getName().empty())
@@ -1871,7 +1884,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
// Print extra comments for DEBUG_VALUE.
- if (isDebugValue() && getDebugVariableOp().isMetadata()) {
+ if (isDebugValueLike() && getDebugVariableOp().isMetadata()) {
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
@@ -2378,3 +2391,72 @@ unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) {
DebugInstrNum = MF.getNewDebugInstrNum();
return DebugInstrNum;
}
+
+std::tuple<LLT, LLT> MachineInstr::getFirst2LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT> MachineInstr::getFirst3LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT> MachineInstr::getFirst4LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT, LLT> MachineInstr::getFirst5LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()),
+ getRegInfo()->getType(getOperand(4).getReg()));
+}
+
+std::tuple<Register, LLT, Register, LLT>
+MachineInstr::getFirst2RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst3RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1), Reg2,
+ getRegInfo()->getType(Reg2));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst4RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT, Register,
+ LLT>
+MachineInstr::getFirst5RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ Register Reg4 = getOperand(4).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3),
+ Reg4, getRegInfo()->getType(Reg4));
+}
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 0c059a145ca4..b9db34f7be95 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -58,8 +58,7 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
if (MI->isBundle()) {
while (++MII != MIE && MII->isBundledWithPred()) {
MII->unbundleFromPred();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
+ for (MachineOperand &MO : MII->operands()) {
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
@@ -149,8 +148,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (MII->isDebugInstr())
continue;
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
+ for (MachineOperand &MO : MII->operands()) {
if (!MO.isReg())
continue;
if (MO.isDef()) {
@@ -199,8 +197,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
if (!MO.isDead() && Reg.isPhysical()) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (LocalDefSet.insert(SubReg).second)
LocalDefs.push_back(SubReg);
}
@@ -310,6 +307,34 @@ VirtRegInfo llvm::AnalyzeVirtRegInBundle(
return RI;
}
+std::pair<LaneBitmask, LaneBitmask>
+llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+
+ LaneBitmask UseMask, DefMask;
+
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 && MO.isUse() && !MO.isUndef())
+ UseMask |= MRI.getMaxLaneMaskForVReg(Reg);
+
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef()) {
+ if (!MO.isUndef())
+ UseMask |= ~SubRegMask;
+ DefMask |= SubRegMask;
+ } else if (!MO.isUndef())
+ UseMask |= SubRegMask;
+ }
+
+ return {UseMask, DefMask};
+}
+
PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 1c09c01df3aa..4e80e9b58c06 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -112,26 +112,26 @@ STATISTIC(NumNotHoistedDueToHotness,
namespace {
class MachineLICMBase : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetLoweringBase *TLI;
- const TargetRegisterInfo *TRI;
- const MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
TargetSchedModel SchedModel;
- bool PreRegAlloc;
- bool HasProfileData;
+ bool PreRegAlloc = false;
+ bool HasProfileData = false;
// Various analyses that we use...
- AliasAnalysis *AA; // Alias analysis info.
- MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
- MachineLoopInfo *MLI; // Current MachineLoopInfo
- MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+ AliasAnalysis *AA = nullptr; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop
// State that is updated as we process loops
- bool Changed; // True if a loop is changed.
- bool FirstInLoop; // True if it's the first LICM in the loop.
- MachineLoop *CurLoop; // The current loop we are working on.
- MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+ bool Changed = false; // True if a loop is changed.
+ bool FirstInLoop = false; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop = nullptr; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
// Exit blocks for CurLoop.
SmallVector<MachineBasicBlock *, 8> ExitBlocks;
@@ -163,7 +163,7 @@ namespace {
// If a MBB does not dominate loop exiting blocks then it may not safe
// to hoist loads from this block.
// Tri-state: 0 - false, 1 - true, 2 - unknown
- unsigned SpeculationState;
+ unsigned SpeculationState = SpeculateUnknown;
public:
MachineLICMBase(char &PassID, bool PreRegAlloc)
@@ -575,8 +575,8 @@ void MachineLICMBase::HoistRegionPostRA() {
if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidate.MI;
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (!MO.getReg())
continue;
Register Reg = MO.getReg();
if (PhysRegDefs.test(Reg) ||
@@ -600,8 +600,9 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
for (MachineInstr &MI : *BB) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (!MO.getReg())
+ continue;
if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
MO.setIsKill(false);
}
@@ -669,8 +670,8 @@ bool MachineLICMBase::isTriviallyReMaterializable(
if (!TII->isTriviallyReMaterializable(MI))
return false;
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg().isVirtual())
return false;
}
@@ -866,7 +867,7 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
continue;
const int *PS = TRI->getRegClassPressureSets(RC);
for (; *PS != -1; ++PS) {
- if (Cost.find(*PS) == Cost.end())
+ if (!Cost.contains(*PS))
Cost[*PS] = RCCost;
else
Cost[*PS] += RCCost;
@@ -1014,9 +1015,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -1455,8 +1454,8 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
- for (MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isDef() && !MO.isDead())
+ for (MachineOperand &MO : MI->all_defs())
+ if (!MO.isDead())
MRI->clearKillFlags(MO.getReg());
// Add to the CSE map.
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c400ce190b46..c44b968b317d 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -39,17 +39,29 @@ STATISTIC(NumRemoved, "Number of redundant instructions removed.");
namespace {
class MachineLateInstrsCleanup : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ // Data structures to map regs to their definitions and kills per MBB.
+ struct Reg2MIMap : public SmallDenseMap<Register, MachineInstr *> {
+ bool hasIdentical(Register Reg, MachineInstr *ArgMI) {
+ MachineInstr *MI = lookup(Reg);
+ return MI && MI->isIdenticalTo(*ArgMI);
+ }
+ };
- // Data structures to map regs to their definitions per MBB.
- using Reg2DefMap = std::map<Register, MachineInstr*>;
- std::vector<Reg2DefMap> RegDefs;
+ std::vector<Reg2MIMap> RegDefs;
+ std::vector<Reg2MIMap> RegKills;
// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);
+ void removeRedundantDef(MachineInstr *MI);
+ void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds);
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -88,6 +100,8 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());
+ RegKills.clear();
+ RegKills.resize(MF.getNumBlockIDs());
// Visit all MBBs in an order that maximises the reuse from predecessors.
bool Changed = false;
@@ -102,41 +116,36 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
-static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- BitVector &VisitedPreds,
- const TargetRegisterInfo *TRI) {
+void MachineLateInstrsCleanup::
+clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds) {
VisitedPreds.set(MBB->getNumber());
- while (I != MBB->begin()) {
- --I;
- bool Found = false;
- for (auto &MO : I->operands())
- if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
- if (MO.isDef())
- return;
- if (MO.readsReg()) {
- MO.setIsKill(false);
- Found = true; // Keep going for an implicit kill of the super-reg.
- }
- }
- if (Found)
- return;
+
+ // Kill flag in MBB
+ if (MachineInstr *KillMI = RegKills[MBB->getNumber()].lookup(Reg)) {
+ KillMI->clearRegisterKills(Reg, TRI);
+ return;
}
+ // Def in MBB (missing kill flag)
+ if (MachineInstr *DefMI = RegDefs[MBB->getNumber()].lookup(Reg))
+ if (DefMI->getParent() == MBB)
+ return;
+
// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
- clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
+ clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds);
}
-static void removeRedundantDef(MachineInstr *MI,
- const TargetRegisterInfo *TRI) {
+void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
- clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
+ clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds);
MI->eraseFromParent();
++NumRemoved;
}
@@ -172,18 +181,18 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
- Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
+ Reg2MIMap &MBBDefs = RegDefs[MBB->getNumber()];
+ Reg2MIMap &MBBKills = RegKills[MBB->getNumber()];
// Find reusable definitions in the predecessor(s).
- if (!MBB->pred_empty() && !MBB->isEHPad()) {
+ if (!MBB->pred_empty() && !MBB->isEHPad() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
MachineBasicBlock *FirstPred = *MBB->pred_begin();
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
- auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
- return PredDefI != RegDefs[Pred->getNumber()].end() &&
- DefMI->isIdenticalTo(*PredDefI->second);
+ return RegDefs[Pred->getNumber()].hasIdentical(Reg, DefMI);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
@@ -200,6 +209,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
// it) are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
+ MBBKills.clear();
continue;
}
@@ -207,24 +217,23 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
// Check for an earlier identical and reusable instruction.
- if (IsCandidate) {
- auto DefI = MBBDefs.find(DefedReg);
- if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
- LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
- << printMBBReference(*MBB) << ": " << MI;);
- removeRedundantDef(&MI, TRI);
- Changed = true;
- continue;
- }
+ if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
+ LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ removeRedundantDef(&MI);
+ Changed = true;
+ continue;
}
// Clear any entries in map that MI clobbers.
- for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
- Register Reg = DefI->first;
- if (MI.modifiesRegister(Reg, TRI))
- DefI = MBBDefs.erase(DefI);
- else
- ++DefI;
+ for (auto DefI : llvm::make_early_inc_range(MBBDefs)) {
+ Register Reg = DefI.first;
+ if (MI.modifiesRegister(Reg, TRI)) {
+ MBBDefs.erase(Reg);
+ MBBKills.erase(Reg);
+ } else if (MI.findRegisterUseOperandIdx(Reg, true /*isKill*/, TRI) != -1)
+ // Keep track of register kills.
+ MBBKills[Reg] = &MI;
}
// Record this MI for potential later reuse.
@@ -232,6 +241,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
+ assert(!MBBKills.count(DefedReg) && "Should already have been removed.");
}
}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index fb3af385a0c1..37a0ff3d71c8 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -23,6 +22,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
+#include "llvm/Support/GenericLoopInfoImpl.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index a0c0166d06f0..921feb253d64 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -56,11 +56,10 @@ void MachineModuleInfo::finalize() {
MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
- Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(),
- MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr,
- &MMI.TM.Options.MCOptions, false),
+ Context(TM.getTargetTriple(), TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
+ TM.getMCSubtargetInfo(), nullptr, &TM.Options.MCOptions, false),
MachineFunctions(std::move(MMI.MachineFunctions)) {
- Context.setObjectFileInfo(MMI.TM.getObjFileLowering());
+ Context.setObjectFileInfo(TM.getObjFileLowering());
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
ExternalContext = MMI.ExternalContext;
@@ -107,6 +106,10 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
MF->initTargetMachineFunctionInfo(STI);
+
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(*MF);
+
// Update the set entry.
I.first->second.reset(MF);
} else {
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 0a7b12e9ccb9..788c134b6ee8 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/MIRFormatter.h"
@@ -53,6 +52,11 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
}
+unsigned MachineOperand::getOperandNo() const {
+ assert(getParent() && "Operand does not belong to any instruction!");
+ return getParent()->getOperandNo(this);
+}
+
void MachineOperand::setReg(Register Reg) {
if (getReg() == Reg)
return; // No change.
@@ -986,7 +990,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_Predicate: {
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
- << CmpInst::getPredicateName(Pred) << ')';
+ << Pred << ')';
break;
}
case MachineOperand::MO_ShuffleMask:
@@ -1022,10 +1026,10 @@ unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; }
/// Offset + Size byte.
bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
const DataLayout &DL) const {
- if (!V.is<const Value *>())
+ if (!isa<const Value *>(V))
return false;
- const Value *BasePtr = V.get<const Value *>();
+ const Value *BasePtr = cast<const Value *>(V);
if (BasePtr == nullptr)
return false;
@@ -1070,8 +1074,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
AtomicOrdering FailureOrdering)
: PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a),
AAInfo(AAInfo), Ranges(Ranges) {
- assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
- isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
+ assert((PtrInfo.V.isNull() || isa<const PseudoSourceValue *>(PtrInfo.V) ||
+ isa<PointerType>(cast<const Value *>(PtrInfo.V)->getType())) &&
"invalid pointer value");
assert((isLoad() || isStore()) && "Not a load/store!");
@@ -1093,16 +1097,6 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a,
AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
-/// Profile - Gather unique data for the object.
-///
-void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(getOffset());
- ID.AddInteger(getMemoryType().getUniqueRAWLLTData());
- ID.AddPointer(getOpaqueValue());
- ID.AddInteger(getFlags());
- ID.AddInteger(getBaseAlign().value());
-}
-
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index c7ba66bd3678..a0769105c929 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -89,11 +89,14 @@ STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
// Statistics for instruction mapping.
-STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector");
+STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
STATISTIC(NumIllegalInUnsignedVec,
- "Number of illegal instrs in unsigned vector");
-STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector");
-STATISTIC(UnsignedVecSize, "Size of unsigned vector");
+ "Unoutlinable instructions mapped + number of sentinel values");
+STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
+STATISTIC(NumInvisible,
+ "Invisible instructions skipped during mapping");
+STATISTIC(UnsignedVecSize,
+ "Total number of instructions mapped and saved to mapping vector");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -113,6 +116,11 @@ static cl::opt<unsigned> OutlinerReruns(
cl::desc(
"Number of times to rerun the outliner after the initial outline"));
+static cl::opt<unsigned> OutlinerBenefitThreshold(
+ "outliner-benefit-threshold", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The minimum size in bytes before an outlining candidate is accepted"));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -136,11 +144,11 @@ struct InstructionMapper {
DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
/// The vector of unsigned integers that the module is mapped to.
- std::vector<unsigned> UnsignedVec;
+ SmallVector<unsigned> UnsignedVec;
/// Stores the location of the instruction associated with the integer
/// at index i in \p UnsignedVec for each index i.
- std::vector<MachineBasicBlock::iterator> InstrList;
+ SmallVector<MachineBasicBlock::iterator> InstrList;
// Set if we added an illegal number in the previous step.
// Since each illegal number is unique, we only need one of them between
@@ -157,8 +165,8 @@ struct InstructionMapper {
unsigned mapToLegalUnsigned(
MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
bool &HaveLegalRange, unsigned &NumLegalInBlock,
- std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
// We added something legal, so we should unset the AddedLegalLastTime
// flag.
AddedIllegalLastTime = false;
@@ -211,8 +219,8 @@ struct InstructionMapper {
/// \returns The integer that \p *It was mapped to.
unsigned mapToIllegalUnsigned(
MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
- std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
// Can't outline an illegal instruction. Set the flag.
CanOutlineWithPrevInstr = false;
@@ -254,12 +262,20 @@ struct InstructionMapper {
/// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
const TargetInstrInfo &TII) {
+ LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
+ << "' to unsigned vector ***\n");
unsigned Flags = 0;
// Don't even map in this case.
if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
return;
+ auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
+ LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
+ << " outlinable range(s)\n");
+ if (OutlinableRanges.empty())
+ return;
+
// Store info for the MBB for later outlining.
MBBFlagsMap[&MBB] = Flags;
@@ -279,40 +295,71 @@ struct InstructionMapper {
// FIXME: Should this all just be handled in the target, rather than using
// repeated calls to getOutliningType?
- std::vector<unsigned> UnsignedVecForMBB;
- std::vector<MachineBasicBlock::iterator> InstrListForMBB;
-
- for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) {
- // Keep track of where this instruction is in the module.
- switch (TII.getOutliningType(It, Flags)) {
- case InstrType::Illegal:
+ SmallVector<unsigned> UnsignedVecForMBB;
+ SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
+
+ LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
+ for (auto &OutlinableRange : OutlinableRanges) {
+ auto OutlinableRangeBegin = OutlinableRange.first;
+ auto OutlinableRangeEnd = OutlinableRange.second;
+#ifndef NDEBUG
+ LLVM_DEBUG(
+ dbgs() << "Mapping "
+ << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
+ << " instruction range\n");
+ // Everything outside of an outlinable range is illegal.
+ unsigned NumSkippedInRange = 0;
+#endif
+ for (; It != OutlinableRangeBegin; ++It) {
+#ifndef NDEBUG
+ ++NumSkippedInRange;
+#endif
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- break;
-
- case InstrType::Legal:
- mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
- NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
- break;
-
- case InstrType::LegalTerminator:
- mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
- NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
- // The instruction also acts as a terminator, so we have to record that
- // in the string.
- mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ }
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
+ << " instructions outside outlinable range\n");
+#endif
+ assert(It != MBB.end() && "Should still have instructions?");
+ // `It` is now positioned at the beginning of a range of instructions
+ // which may be outlinable. Check if each instruction is known to be safe.
+ for (; It != OutlinableRangeEnd; ++It) {
+ // Keep track of where this instruction is in the module.
+ switch (TII.getOutliningType(It, Flags)) {
+ case InstrType::Illegal:
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Legal:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::LegalTerminator:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
InstrListForMBB);
- break;
-
- case InstrType::Invisible:
- // Normally this is set by mapTo(Blah)Unsigned, but we just want to
- // skip this instruction. So, unset the flag here.
- ++NumInvisible;
- AddedIllegalLastTime = false;
- break;
+ // The instruction also acts as a terminator, so we have to record
+ // that in the string.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Invisible:
+ // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+ // skip this instruction. So, unset the flag here.
+ ++NumInvisible;
+ AddedIllegalLastTime = false;
+ break;
+ }
}
}
+ LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
+
// Are there enough legal instructions in the block for outlining to be
// possible?
if (HaveLegalRange) {
@@ -322,8 +369,9 @@ struct InstructionMapper {
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- llvm::append_range(InstrList, InstrListForMBB);
- llvm::append_range(UnsignedVec, UnsignedVecForMBB);
+ ++NumSentinels;
+ append_range(InstrList, InstrListForMBB);
+ append_range(UnsignedVec, UnsignedVecForMBB);
}
}
@@ -533,11 +581,19 @@ void MachineOutliner::findCandidates(
// First, find all of the repeated substrings in the tree of minimum length
// 2.
std::vector<Candidate> CandidatesForRepeatedSeq;
+ LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
+ LLVM_DEBUG(
+ dbgs() << "Searching for overlaps in all repeated sequences...\n");
for (const SuffixTree::RepeatedSubstring &RS : ST) {
CandidatesForRepeatedSeq.clear();
unsigned StringLen = RS.Length;
+ LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
+ // Debug code to keep track of how many candidates we removed.
+#ifndef NDEBUG
+ unsigned NumDiscarded = 0;
+ unsigned NumKept = 0;
+#endif
for (const unsigned &StartIdx : RS.StartIndices) {
- unsigned EndIdx = StartIdx + StringLen - 1;
// Trick: Discard some candidates that would be incompatible with the
// ones we've already found for this sequence. This will save us some
// work in candidate selection.
@@ -559,23 +615,39 @@ void MachineOutliner::findCandidates(
// That is, one must either
// * End before the other starts
// * Start after the other ends
- if (llvm::all_of(CandidatesForRepeatedSeq, [&StartIdx,
- &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
- })) {
- // It doesn't overlap with anything, so we can outline it.
- // Each sequence is over [StartIt, EndIt].
- // Save the candidate and its location.
-
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
- MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
- MachineBasicBlock *MBB = StartIt->getParent();
-
- CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
- EndIt, MBB, FunctionList.size(),
- Mapper.MBBFlagsMap[MBB]);
+ unsigned EndIdx = StartIdx + StringLen - 1;
+ auto FirstOverlap = find_if(
+ CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
+ return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
+ });
+ if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+#ifndef NDEBUG
+ ++NumDiscarded;
+ LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx
+ << ", " << EndIdx << "]; overlaps with candidate @ ["
+ << FirstOverlap->getStartIdx() << ", "
+ << FirstOverlap->getEndIdx() << "]\n");
+#endif
+ continue;
}
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+#ifndef NDEBUG
+ ++NumKept;
+#endif
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ MachineBasicBlock *MBB = StartIt->getParent();
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, EndIt,
+ MBB, FunctionList.size(),
+ Mapper.MBBFlagsMap[MBB]);
}
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
+#endif
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
@@ -588,21 +660,21 @@ void MachineOutliner::findCandidates(
const TargetInstrInfo *TII =
CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
- OutlinedFunction OF =
+ std::optional<OutlinedFunction> OF =
TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
// If we deleted too many candidates, then there's nothing worth outlining.
// FIXME: This should take target-specified instruction sizes into account.
- if (OF.Candidates.size() < 2)
+ if (!OF || OF->Candidates.size() < 2)
continue;
// Is it better to outline this candidate than not?
- if (OF.getBenefit() < 1) {
- emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
+ if (OF->getBenefit() < OutlinerBenefitThreshold) {
+ emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, *OF);
continue;
}
- FunctionList.push_back(OF);
+ FunctionList.push_back(*OF);
}
}
@@ -616,6 +688,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
if (OutlineRepeatedNum > 0)
FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_";
FunctionName += std::to_string(Name);
+ LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -653,6 +726,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MF.setIsOutlined(true);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
// Insert the new function into the module.
@@ -720,7 +794,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
Mangler Mg;
// Get the mangled name of the function for the linkage name.
std::string Dummy;
- llvm::raw_string_ostream MangledNameStream(Dummy);
+ raw_string_ostream MangledNameStream(Dummy);
Mg.getNameWithPrefix(MangledNameStream, F, false);
DISubprogram *OutlinedSP = DB.createFunction(
@@ -750,30 +824,51 @@ bool MachineOutliner::outline(Module &M,
std::vector<OutlinedFunction> &FunctionList,
InstructionMapper &Mapper,
unsigned &OutlinedFunctionNum) {
-
+ LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
+ LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
+ << "\n");
bool OutlinedSomething = false;
// Sort by benefit. The most beneficial functions should be outlined first.
- llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
- const OutlinedFunction &RHS) {
- return LHS.getBenefit() > RHS.getBenefit();
- });
+ stable_sort(FunctionList,
+ [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
+ auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
+ LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
for (OutlinedFunction &OF : FunctionList) {
+#ifndef NDEBUG
+ auto NumCandidatesBefore = OF.Candidates.size();
+#endif
// If we outlined something that overlapped with a candidate in a previous
// step, then we can't outline from it.
- erase_if(OF.Candidates, [&Mapper](Candidate &C) {
- return std::any_of(
- Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
- [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
+ erase_if(OF.Candidates, [&UnsignedVecBegin](Candidate &C) {
+ return std::any_of(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
+ return I == static_cast<unsigned>(-1);
+ });
});
+#ifndef NDEBUG
+ auto NumCandidatesAfter = OF.Candidates.size();
+ LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
+ << "/" << NumCandidatesBefore << " candidates\n");
+#endif
+
// If we made it unbeneficial to outline this function, skip it.
- if (OF.getBenefit() < 1)
+ if (OF.getBenefit() < OutlinerBenefitThreshold) {
+ LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF.getBenefit()
+ << " B) < threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF.getBenefit()
+ << " B) > threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
// It's beneficial. Create the function and outline its sequence's
// occurrences.
@@ -786,6 +881,7 @@ bool MachineOutliner::outline(Module &M,
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Replace occurrences of the sequence with calls to the new function.
+ LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
for (Candidate &C : OF.Candidates) {
MachineBasicBlock &MBB = *C.getMBB();
MachineBasicBlock::iterator StartIt = C.front();
@@ -793,6 +889,18 @@ bool MachineOutliner::outline(Module &M,
// Insert the call.
auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+// Insert the call.
+#ifndef NDEBUG
+ auto MBBBeingOutlinedFromName =
+ MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
+ auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
+ ? "<unknown>"
+ : MBB.getParent()->getName().str();
+ LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
+ << MFBeingOutlinedFromName << ":"
+ << MBBBeingOutlinedFromName << "\n");
+ LLVM_DEBUG(dbgs() << " .. " << *CallInst);
+#endif
// If the caller tracks liveness, then we need to make sure that
// anything we outline doesn't break liveness assumptions. The outlined
@@ -859,9 +967,8 @@ bool MachineOutliner::outline(Module &M,
MBB.erase(std::next(StartIt), std::next(EndIt));
// Keep track of what we removed by marking them all as -1.
- for (unsigned &I :
- llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1))
+ for (unsigned &I : make_range(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1))
I = static_cast<unsigned>(-1);
OutlinedSomething = true;
@@ -878,13 +985,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
MachineModuleInfo &MMI) {
// Build instruction mappings for each function in the module. Start by
// iterating over each Function in M.
+ LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
for (Function &F : M) {
+ LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
if (F.hasFnAttribute("nooutline")) {
- LLVM_DEBUG({
- dbgs() << "... Skipping function with nooutline attribute: "
- << F.getName() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
continue;
}
@@ -894,44 +1000,58 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
// If it doesn't, then there's nothing to outline from. Move to the next
// Function.
- if (!MF)
+ if (!MF) {
+ LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
continue;
+ }
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
- if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
+ if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
+ LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
+ "function by default\n");
continue;
+ }
// We have a MachineFunction. Ask the target if it's suitable for outlining.
// If it isn't, then move on to the next Function in the module.
- if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
+ if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) {
+ LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
+ << ": unsafe to outline from\n");
continue;
+ }
// We have a function suitable for outlining. Iterate over every
// MachineBasicBlock in MF and try to map its instructions to a list of
// unsigned integers.
+ const unsigned MinMBBSize = 2;
+
for (MachineBasicBlock &MBB : *MF) {
+ LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
// If there isn't anything in MBB, then there's no point in outlining from
// it.
// If there are fewer than 2 instructions in the MBB, then it can't ever
// contain something worth outlining.
// FIXME: This should be based off of the maximum size in B of an outlined
// call versus the size in B of the MBB.
- if (MBB.empty() || MBB.size() < 2)
+ if (MBB.size() < MinMBBSize) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
+ << MinMBBSize << "\n");
continue;
+ }
// Check if MBB could be the target of an indirect branch. If it is, then
// we don't want to outline from it.
- if (MBB.hasAddressTaken())
+ if (MBB.hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
continue;
+ }
// MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TII);
}
-
- // Statistics.
- UnsignedVecSize = Mapper.UnsignedVec.size();
}
+ // Statistics.
+ UnsignedVecSize = Mapper.UnsignedVec.size();
}
void MachineOutliner::initSizeRemarkInfo(
diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp
index 039634f3d047..439ff8babcc6 100644
--- a/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -91,8 +91,8 @@ Error MachineFunctionPassManager::run(Module &M,
// TODO: EmitSizeRemarks
PreservedAnalyses PassPA = P->run(MF, MFAM);
- PI.runAfterPass(*P, MF, PassPA);
MFAM.invalidate(MF, PassPA);
+ PI.runAfterPass(*P, MF, PassPA);
}
}
} while (true);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index adb630469003..c7e7497dab36 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -496,7 +496,7 @@ void SwingSchedulerDAG::schedule() {
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
changeDependences();
- postprocessDAG();
+ postProcessDAG();
LLVM_DEBUG(dump());
NodeSetType NodeSets;
@@ -865,13 +865,11 @@ void SwingSchedulerDAG::updatePhiDependences() {
unsigned HasPhiDef = 0;
MachineInstr *MI = I.getInstr();
// Iterate over each operand, and we process the definitions.
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MOI->getReg();
- if (MOI->isDef()) {
+ Register Reg = MO.getReg();
+ if (MO.isDef()) {
// If the register is used by a Phi, then create an anti dependence.
for (MachineRegisterInfo::use_instr_iterator
UI = MRI.use_instr_begin(Reg),
@@ -893,7 +891,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
}
}
}
- } else if (MOI->isUse()) {
+ } else if (MO.isUse()) {
// If the register is defined by a Phi, then create a true dependence.
MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
if (DefMI == nullptr)
@@ -903,7 +901,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(0);
- ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
+ ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep);
I.addPred(Dep);
} else {
HasPhiUse = Reg;
@@ -1559,31 +1557,28 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
const MachineInstr *MI = SU->getInstr();
if (MI->isPHI())
continue;
- for (const MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isUse()) {
- Register Reg = MO.getReg();
- if (Reg.isVirtual())
- Uses.insert(Reg);
- else if (MRI.isAllocatable(Reg))
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- Uses.insert(*Units);
- }
+ for (const MachineOperand &MO : MI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual())
+ Uses.insert(Reg);
+ else if (MRI.isAllocatable(Reg))
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ Uses.insert(Unit);
+ }
}
for (SUnit *SU : NS)
- for (const MachineOperand &MO : SU->getInstr()->operands())
- if (MO.isReg() && MO.isDef() && !MO.isDead()) {
+ for (const MachineOperand &MO : SU->getInstr()->all_defs())
+ if (!MO.isDead()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
if (!Uses.count(Reg))
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- if (!Uses.count(*Units))
- LiveOutRegs.push_back(RegisterMaskPair(*Units,
- LaneBitmask::getNone()));
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ if (!Uses.count(Unit))
+ LiveOutRegs.push_back(
+ RegisterMaskPair(Unit, LaneBitmask::getNone()));
}
}
RPTracker.addLiveRegs(LiveOutRegs);
@@ -2316,7 +2311,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
}
-void SwingSchedulerDAG::postprocessDAG() {
+void SwingSchedulerDAG::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
@@ -2654,10 +2649,7 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
if (!isLoopCarried(SSD, *Phi))
return false;
unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
- for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
- MachineOperand &DMO = Def->getOperand(i);
- if (!DMO.isReg() || !DMO.isDef())
- continue;
+ for (MachineOperand &DMO : Def->all_defs()) {
if (DMO.getReg() == LoopReg)
return true;
}
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1ad08e19feae..0048918fc53b 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -101,13 +101,13 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
const auto RegCB = getRegClassOrRegBank(Reg);
if (RegCB.isNull())
setRegClassOrRegBank(Reg, ConstrainingRegCB);
- else if (RegCB.is<const TargetRegisterClass *>() !=
- ConstrainingRegCB.is<const TargetRegisterClass *>())
+ else if (isa<const TargetRegisterClass *>(RegCB) !=
+ isa<const TargetRegisterClass *>(ConstrainingRegCB))
return false;
- else if (RegCB.is<const TargetRegisterClass *>()) {
+ else if (isa<const TargetRegisterClass *>(RegCB)) {
if (!::constrainRegClass(
- *this, Reg, RegCB.get<const TargetRegisterClass *>(),
- ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs))
+ *this, Reg, cast<const TargetRegisterClass *>(RegCB),
+ cast<const TargetRegisterClass *>(ConstrainingRegCB), MinNumRegs))
return false;
} else if (RegCB != ConstrainingRegCB)
return false;
@@ -644,16 +644,8 @@ void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
- bool IsRootReserved = true;
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
- if (!isReserved(Reg)) {
- IsRootReserved = false;
- break;
- }
- }
- if (IsRootReserved)
+ if (all_of(TRI->superregs_inclusive(*Root),
+ [&](MCPhysReg Super) { return isReserved(Super); }))
return true;
}
return false;
diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp
index 6de8f8da9254..324084fb9c32 100644
--- a/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -21,8 +21,6 @@
using namespace llvm;
-const Register MachineSSAContext::ValueRefNull{};
-
void MachineSSAContext::setFunction(MachineFunction &Fn) {
MF = &Fn;
RegInfo = &MF->getRegInfo();
@@ -42,10 +40,8 @@ void MachineSSAContext::appendBlockTerms(
void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
const MachineBasicBlock &block) {
for (const MachineInstr &instr : block.instrs()) {
- for (const MachineOperand &op : instr.operands()) {
- if (op.isReg() && op.isDef())
- defs.push_back(op.getReg());
- }
+ for (const MachineOperand &op : instr.all_defs())
+ defs.push_back(op.getReg());
}
}
@@ -56,7 +52,7 @@ MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
return RegInfo->getVRegDef(value)->getParent();
}
-bool MachineSSAContext::isConstantValuePhi(const MachineInstr &Phi) {
+bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return Phi.isConstantValuePHI();
}
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 5ab5a40e7574..ba5432459d12 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -56,7 +57,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -98,9 +98,13 @@ cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
cl::opt<bool> MISchedDumpReservedCycles(
"misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
cl::desc("Dump resource usage at schedule boundary."));
+cl::opt<bool> MischedDetailResourceBooking(
+ "misched-detail-resource-booking", cl::Hidden, cl::init(false),
+ cl::desc("Show details of invoking getNextResoufceCycle."));
#else
const bool ViewMISchedDAGs = false;
const bool PrintDAGs = false;
+const bool MischedDetailResourceBooking = false;
#ifdef LLVM_ENABLE_DUMP
const bool MISchedDumpReservedCycles = false;
#endif // LLVM_ENABLE_DUMP
@@ -147,6 +151,28 @@ static cl::opt<unsigned>
cl::desc("The threshold for fast cluster"),
cl::init(1000));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> MISchedDumpScheduleTrace(
+ "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
+static cl::opt<unsigned>
+ HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
+ cl::desc("Set width of the columns with "
+ "the resources and schedule units"),
+ cl::init(19));
+static cl::opt<unsigned>
+ ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
+ cl::desc("Set width of the columns showing resource booking."),
+ cl::init(5));
+static cl::opt<bool> MISchedSortResourcesInTrace(
+ "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
+ cl::desc("Sort the resources printed in the dump trace"));
+#endif
+
+static cl::opt<unsigned>
+ MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
+ cl::desc("Number of intervals to track"), cl::init(10));
+
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -777,7 +803,7 @@ void ScheduleDAGMI::schedule() {
// Build the DAG.
buildSchedGraph(AA);
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -844,7 +870,7 @@ void ScheduleDAGMI::schedule() {
}
/// Apply each ScheduleDAGMutation step in order.
-void ScheduleDAGMI::postprocessDAG() {
+void ScheduleDAGMI::postProcessDAG() {
for (auto &m : Mutations)
m->apply(this);
}
@@ -931,7 +957,181 @@ void ScheduleDAGMI::placeDebugValues() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static const char *scheduleTableLegend = " i: issue\n x: resource booked";
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (TopDown):\n";
+ dbgs() << scheduleTableLegend << "\n";
+ const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
+ unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
+ LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (unsigned C = FirstCycle; C <= LastCycle; ++C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ unsigned C = FirstCycle;
+ for (; C <= LastCycle; ++C) {
+ if (C == SU->TopReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C < SU->TopReadyCycle + PI.StartAtCycle; ++C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, ++C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C++ <= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (BottomUp):\n";
+ dbgs() << scheduleTableLegend << "\n";
+
+ const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
+ int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
+ LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (int C = FirstCycle; C >= LastCycle; --C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ int C = FirstCycle;
+ for (; C >= LastCycle; --C) {
+ if (C == (int)SU->BotReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C > ((int)SU->BotReadyCycle - (int)PI.StartAtCycle); --C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, --C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C-- >= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+#endif
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
+ if (MISchedDumpScheduleTrace) {
+ if (ForceTopDown)
+ dumpScheduleTraceTopDown();
+ else if (ForceBottomUp)
+ dumpScheduleTraceBottomUp();
+ else {
+ dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+ }
+ }
+
for (MachineInstr &MI : *this) {
if (SUnit *SU = getSUnit(&MI))
dumpNode(*SU);
@@ -967,8 +1167,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
// Ignore re-defs.
if (TrackLaneMasks) {
bool FoundDef = false;
- for (const MachineOperand &MO2 : MI.operands()) {
- if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ for (const MachineOperand &MO2 : MI.all_defs()) {
+ if (MO2.getReg() == Reg && !MO2.isDead()) {
FoundDef = true;
break;
}
@@ -1223,7 +1423,7 @@ void ScheduleDAGMILive::schedule() {
LLVM_DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -2008,6 +2208,7 @@ void SchedBoundary::reset() {
ZoneCritResIdx = 0;
IsResourceLimited = false;
ReservedCycles.clear();
+ ReservedResourceSegments.clear();
ReservedCyclesIndex.clear();
ResourceGroupSubUnitMasks.clear();
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
@@ -2036,7 +2237,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ assert(PI->Cycles >= PI->StartAtCycle);
+ RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle));
}
}
}
@@ -2089,14 +2291,24 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
/// Compute the next cycle at which the given processor resource unit
/// can be scheduled.
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
- unsigned Cycles) {
+ unsigned Cycles,
+ unsigned StartAtCycle) {
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (isTop())
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
+ CurrCycle, StartAtCycle, Cycles);
+
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
+ CurrCycle, StartAtCycle, Cycles);
+ }
+
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
- return 0;
+ return CurrCycle;
// For bottom-up scheduling add the cycles needed for the current operation.
if (!isTop())
- NextUnreserved += Cycles;
+ NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles);
return NextUnreserved;
}
@@ -2105,8 +2317,12 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles) {
-
+ unsigned Cycles, unsigned StartAtCycle) {
+ if (MischedDetailResourceBooking) {
+ LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
+ LLVM_DEBUG(dumpReservedCycles());
+ LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
+ }
unsigned MinNextUnreserved = InvalidCycle;
unsigned InstanceIdx = 0;
unsigned StartIndex = ReservedCyclesIndex[PIdx];
@@ -2134,7 +2350,7 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
unsigned NextUnreserved, NextInstanceIdx;
std::tie(NextUnreserved, NextInstanceIdx) =
- getNextResourceCycle(SC, SubUnits[I], Cycles);
+ getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle);
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = NextInstanceIdx;
MinNextUnreserved = NextUnreserved;
@@ -2145,12 +2361,21 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
- unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
+ unsigned NextUnreserved =
+ getNextResourceCycleByInstance(I, Cycles, StartAtCycle);
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
+ << NextUnreserved << "c\n");
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = I;
MinNextUnreserved = NextUnreserved;
}
}
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
+ << "[" << InstanceIdx - StartIndex << "]"
+ << " available @" << MinNextUnreserved << "c"
+ << "\n");
return std::make_pair(MinNextUnreserved, InstanceIdx);
}
@@ -2195,8 +2420,10 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
+ unsigned StartAtCycle = PE.StartAtCycle;
unsigned NRCycle, InstanceIdx;
- std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
+ std::tie(NRCycle, InstanceIdx) =
+ getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle);
if (NRCycle > CurrCycle) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
MaxObservedStall = std::max(Cycles, MaxObservedStall);
@@ -2347,9 +2574,10 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles, unsigned NextCycle) {
+ unsigned Cycles, unsigned NextCycle,
+ unsigned StartAtCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- unsigned Count = Factor * Cycles;
+ unsigned Count = Factor * (Cycles - StartAtCycle);
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
<< Cycles << "x" << Factor << "u\n");
@@ -2369,7 +2597,8 @@ unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable, InstanceIdx;
- std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
+ std::tie(NextAvailable, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getResourceName(PIdx)
@@ -2448,8 +2677,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- unsigned RCycle =
- countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
+ unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles,
+ NextCycle, PI->StartAtCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
@@ -2463,14 +2692,33 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
- unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) =
- getNextResourceCycle(SC, PIdx, 0);
- if (isTop()) {
- ReservedCycles[InstanceIdx] =
- std::max(ReservedUntil, NextCycle + PI->Cycles);
- } else
- ReservedCycles[InstanceIdx] = NextCycle;
+
+ if (SchedModel && SchedModel->enableIntervals()) {
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalTop(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ } else {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalBottom(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ }
+ } else {
+
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedCycles[InstanceIdx] =
+ std::max(ReservedUntil, NextCycle + PI->Cycles);
+ } else
+ ReservedCycles[InstanceIdx] = NextCycle;
+ }
}
}
}
@@ -2610,8 +2858,14 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
std::string ResName = SchedModel->getResourceName(ResIdx);
for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
- dbgs() << ResName << "(" << UnitIdx
- << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n";
+ dbgs() << ResName << "(" << UnitIdx << ") = ";
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (ReservedResourceSegments.count(StartIdx + UnitIdx))
+ dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
+ else
+ dbgs() << "{ }\n";
+ } else
+ dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
}
StartIdx += NumUnits;
}
@@ -3978,3 +4232,101 @@ void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
void ScheduleDAGMI::viewGraph() {
viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
}
+
+/// Sort predicate for the intervals stored in an instance of
+/// ResourceSegments. Intervals are always disjoint (no intersection
+/// for any pairs of intervals), therefore we can sort the totality of
+/// the intervals by looking only at the left boundary.
+static bool sortIntervals(const ResourceSegments::IntervalTy &A,
+ const ResourceSegments::IntervalTy &B) {
+ return A.first < B.first;
+}
+
+unsigned ResourceSegments::getFirstAvailableAt(
+ unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
+ std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
+ IntervalBuilder) const {
+ assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
+ sortIntervals) &&
+ "Cannot execute on an un-sorted set of intervals.");
+ unsigned RetCycle = CurrCycle;
+ ResourceSegments::IntervalTy NewInterval =
+ IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ for (auto &Interval : _Intervals) {
+ if (!intersects(NewInterval, Interval))
+ continue;
+
+ // Move the interval right next to the top of the one it
+ // intersects.
+ assert(Interval.second > NewInterval.first &&
+ "Invalid intervals configuration.");
+ RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
+ NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ }
+ return RetCycle;
+}
+
+void ResourceSegments::add(ResourceSegments::IntervalTy A,
+ const unsigned CutOff) {
+ assert(A.first < A.second && "Cannot add empty resource usage");
+ assert(CutOff > 0 && "0-size interval history has no use.");
+ assert(all_of(_Intervals,
+ [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
+ return !intersects(A, Interval);
+ }) &&
+ "A resource is being overwritten");
+ _Intervals.push_back(A);
+
+ sortAndMerge();
+
+ // Do not keep the full history of the intervals, just the
+ // latest #CutOff.
+ while (_Intervals.size() > CutOff)
+ _Intervals.pop_front();
+}
+
+bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
+ ResourceSegments::IntervalTy B) {
+ assert(A.first <= A.second && "Invalid interval");
+ assert(B.first <= B.second && "Invalid interval");
+
+ // Share one boundary.
+ if ((A.first == B.first) || (A.second == B.second))
+ return true;
+
+ // full intersersect: [ *** ) B
+ // [***) A
+ if ((A.first > B.first) && (A.second < B.second))
+ return true;
+
+ // right intersect: [ ***) B
+ // [*** ) A
+ if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
+ return true;
+
+ // left intersect: [*** ) B
+ // [ ***) A
+ if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
+ return true;
+
+ return false;
+}
+
+void ResourceSegments::sortAndMerge() {
+ if (_Intervals.size() <= 1)
+ return;
+
+ // First sort the collection.
+ _Intervals.sort(sortIntervals);
+
+ // can use next because I have at least 2 elements in the list
+ auto next = std::next(std::begin(_Intervals));
+ auto E = std::end(_Intervals);
+ for (; next != E; ++next) {
+ if (std::prev(next)->second >= next->first) {
+ next->first = std::prev(next)->first;
+ _Intervals.erase(std::prev(next));
+ continue;
+ }
+ }
+}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 8429d468254a..8da97dc7e742 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -115,15 +115,15 @@ STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
class MachineSinking : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI; // Machine register information
- MachineDominatorTree *DT; // Machine dominator tree
- MachinePostDominatorTree *PDT; // Machine post dominator tree
- MachineCycleInfo *CI;
- MachineBlockFrequencyInfo *MBFI;
- const MachineBranchProbabilityInfo *MBPI;
- AliasAnalysis *AA;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
+ MachineCycleInfo *CI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
// Remember which edges have been considered for breaking.
@@ -268,6 +268,44 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(const MachineBasicBlock *BB,
+ MachineBasicBlock::const_iterator End,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
+ ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB) {
if (!MI.isCopy())
@@ -331,7 +369,7 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
// %p = PHI %y, %bb.0, %def, %bb.1
if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
MachineInstr *UseInst = MO.getParent();
- unsigned OpNo = UseInst->getOperandNo(&MO);
+ unsigned OpNo = MO.getOperandNo();
MachineBasicBlock *UseBlock = UseInst->getParent();
return UseBlock == MBB && UseInst->isPHI() &&
UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
@@ -602,9 +640,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (const MachineOperand &MO : MI.all_uses()) {
Register Reg = MO.getReg();
if (Reg == 0)
continue;
@@ -806,12 +842,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
continue;
if (Reg.isPhysical()) {
- if (MO.isUse() &&
- (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
- continue;
-
- // Don't handle non-constant and non-ignorable physical register.
- return false;
+ // Don't handle non-constant and non-ignorable physical register uses.
+ if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+ return false;
+ continue;
}
// Users for the defs are all dominated by SuccToSinkTo.
@@ -972,16 +1006,24 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
if (MBB == SuccToSinkTo)
return nullptr;
+ if (!SuccToSinkTo)
+ return nullptr;
+
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo && SuccToSinkTo->isEHPad())
+ if (SuccToSinkTo->isEHPad())
return nullptr;
// It ought to be okay to sink instructions into an INLINEASM_BR target, but
// only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
// the source block (which this code does not yet do). So for now, forbid
// doing so.
- if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ if (SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ return nullptr;
+
+ MachineBasicBlock::const_iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI))
return nullptr;
return SuccToSinkTo;
@@ -1302,45 +1344,6 @@ bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
return true;
}
-/// Return true if a target defined block prologue instruction interferes
-/// with a sink candidate.
-static bool blockPrologueInterferes(MachineBasicBlock *BB,
- MachineBasicBlock::iterator End,
- MachineInstr &MI,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- const MachineRegisterInfo *MRI) {
- if (BB->begin() == End)
- return false; // no prologue
- for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
- // Only check target defined prologue instructions
- if (!TII->isBasicBlockPrologue(*PI))
- continue;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Reg)
- continue;
- if (MO.isUse()) {
- if (Reg.isPhysical() &&
- (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
- continue;
- if (PI->modifiesRegister(Reg, TRI))
- return true;
- } else {
- if (PI->readsRegister(Reg, TRI))
- return true;
- // Check for interference with non-dead defs
- auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
- if (DefOp && !DefOp->isDead())
- return true;
- }
- }
- }
- return false;
-}
-
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1383,9 +1386,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isUse())
- continue;
+ for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg == 0 || !Reg.isPhysical())
continue;
@@ -1463,8 +1464,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
continue;
if (!SeenDbgUsers.count(MO.getReg()))
continue;
@@ -1498,10 +1499,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Note that we have to clear the kill flags for any register this instruction
// uses as we may sink over another instruction which currently kills the
// used registers.
- for (MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse())
- RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
- }
+ for (MachineOperand &MO : MI.all_uses())
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
return true;
}
@@ -1517,8 +1516,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
SmallVector<MachineInstr *, 4> DbgDefUsers;
SmallVector<Register, 4> DbgUseRegs;
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
continue;
DbgUseRegs.push_back(MO.getReg());
for (auto &User : MRI.use_instructions(MO.getReg())) {
@@ -1700,8 +1699,8 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
MachineFunction &MF = *SuccBB->getParent();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned DefReg : DefedRegsInCopy)
- for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
- SuccBB->removeLiveIn(*S);
+ for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
+ SuccBB->removeLiveIn(S);
for (auto U : UsedOpsInCopy) {
Register SrcReg = MI->getOperand(U).getReg();
LaneBitmask Mask;
@@ -1793,9 +1792,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
// Record debug use of each reg unit.
- for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
- ++RI)
- MIUnits[*RI].push_back(MO.getReg());
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg()))
+ MIUnits[Unit].push_back(MO.getReg());
}
}
if (IsValid) {
@@ -1844,12 +1842,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
-
- for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
- for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
+ for (auto &MO : MI.all_defs()) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {
+ for (const auto &MIRegs : SeenDbgInstrs.lookup(Unit)) {
auto &Regs = DbgValsToSinkMap[MIRegs.first];
for (unsigned Reg : MIRegs.second)
Regs.push_back(Reg);
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 28712d1a816b..53bed7397d09 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -24,168 +24,11 @@ extern cl::opt<bool> ForcePGSO;
extern cl::opt<int> PgsoCutoffInstrProf;
extern cl::opt<int> PgsoCutoffSampleProf;
-namespace {
-namespace machine_size_opts_detail {
-
-/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
-bool isColdBlock(const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isColdCount(*Count);
-}
-
-bool isColdBlock(BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isColdCount(*Count);
-}
-
-/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
-static bool isHotBlockNthPercentile(int PercentileCutoff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isHotBlockNthPercentile(int PercentileCutoff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isColdBlockNthPercentile(int PercentileCutoff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isColdBlockNthPercentile(int PercentileCutoff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
-}
-
-/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
-/// MachineFunction.
-bool isFunctionColdInCallGraph(
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCount(FunctionCount->getCount()))
- return false;
- for (const auto &MBB : *MF)
- if (!isColdBlock(&MBB, PSI, &MBFI))
- return false;
- return true;
-}
-
-/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
-/// MachineFunction.
-bool isFunctionHotInCallGraphNthPercentile(
- int PercentileCutoff,
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (PSI->isHotCountNthPercentile(PercentileCutoff,
- FunctionCount->getCount()))
- return true;
- for (const auto &MBB : *MF)
- if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
- return true;
- return false;
-}
-
-bool isFunctionColdInCallGraphNthPercentile(
- int PercentileCutoff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCountNthPercentile(PercentileCutoff,
- FunctionCount->getCount()))
- return false;
- for (const auto &MBB : *MF)
- if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
- return false;
- return true;
-}
-} // namespace machine_size_opts_detail
-
-struct MachineBasicBlockBFIAdapter {
- static bool isFunctionColdInCallGraph(const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
- }
- static bool isFunctionHotInCallGraphNthPercentile(
- int CutOff,
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
- CutOff, MF, PSI, MBFI);
- }
- static bool isFunctionColdInCallGraphNthPercentile(
- int CutOff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionColdInCallGraphNthPercentile(
- CutOff, MF, PSI, MBFI);
- }
- static bool isColdBlock(const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
- }
- static bool isColdBlock(BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
- }
- static bool isHotBlockNthPercentile(int CutOff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isHotBlockNthPercentile(
- CutOff, MBB, PSI, MBFI);
- }
- static bool isHotBlockNthPercentile(int CutOff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isHotBlockNthPercentile(
- CutOff, BlockFreq, PSI, MBFI);
- }
- static bool isColdBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, MBB, PSI,
- MBFI);
- }
- static bool isColdBlockNthPercentile(int CutOff, BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, BlockFreq,
- PSI, MBFI);
- }
-};
-} // end anonymous namespace
-
bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
- return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- MF, PSI, MBFI, QueryType);
+ return shouldFuncOptimizeForSizeImpl(MF, PSI, MBFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
@@ -193,8 +36,7 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
assert(MBB);
- return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- MBB, PSI, MBFI, QueryType);
+ return shouldOptimizeForSizeImpl(MBB, PSI, MBFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
@@ -205,6 +47,6 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
if (!PSI || !MBFIW)
return false;
BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
- return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
+ return shouldOptimizeForSizeImpl(BlockFreq, PSI, &MBFIW->getMBFI(),
+ QueryType);
}
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 5c6efd4af074..4f66f2e672d1 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -318,6 +318,21 @@ public:
: MachineTraceMetrics::Ensemble(mtm) {}
};
+/// Pick only the current basic block for the trace and do not choose any
+/// predecessors/successors.
+class LocalEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const override { return "Local"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+
+public:
+ LocalEnsemble(MachineTraceMetrics *MTM)
+ : MachineTraceMetrics::Ensemble(MTM) {}
+};
} // end anonymous namespace
// Select the preferred predecessor for MBB.
@@ -380,15 +395,19 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
// Get an Ensemble sub-class for the requested trace strategy.
MachineTraceMetrics::Ensemble *
-MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
- assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
- Ensemble *&E = Ensembles[strategy];
+MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) {
+ assert(strategy < MachineTraceStrategy::TS_NumStrategies &&
+ "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
if (E)
return E;
// Allocate new Ensemble on demand.
switch (strategy) {
- case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ case MachineTraceStrategy::TS_MinInstrCount:
+ return (E = new MinInstrCountEnsemble(this));
+ case MachineTraceStrategy::TS_Local:
+ return (E = new LocalEnsemble(this));
default: llvm_unreachable("Invalid trace strategy enum");
}
}
@@ -655,9 +674,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
return false;
bool HasPhysRegs = false;
- for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
- E = UseMI.operands_end(); I != E; ++I) {
- const MachineOperand &MO = *I;
+ for (const MachineOperand &MO : UseMI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
@@ -669,7 +686,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
}
// Collect virtual register reads.
if (MO.readsReg())
- Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
}
return HasPhysRegs;
}
@@ -703,9 +720,7 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
SmallVector<MCRegister, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
- for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
- ME = UseMI->operands_end(); MI != ME; ++MI) {
- const MachineOperand &MO = *MI;
+ for (const MachineOperand &MO : UseMI->operands()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
MCRegister Reg = MO.getReg().asMCReg();
@@ -714,17 +729,17 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
if (MO.isDead())
Kills.push_back(Reg);
else
- LiveDefOps.push_back(UseMI->getOperandNo(MI));
+ LiveDefOps.push_back(MO.getOperandNo());
} else if (MO.isKill())
Kills.push_back(Reg);
// Identify dependencies.
if (!MO.readsReg())
continue;
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
if (I == RegUnits.end())
continue;
- Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
break;
}
}
@@ -732,15 +747,14 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
for (MCRegister Kill : Kills)
- for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
- RegUnits.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Kill))
+ RegUnits.erase(Unit);
// Second, live defs.
for (unsigned DefOp : LiveDefOps) {
- for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
- TRI);
- Units.isValid(); ++Units) {
- LiveRegUnit &LRU = RegUnits[*Units];
+ for (MCRegUnit Unit :
+ TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
+ LiveRegUnit &LRU = RegUnits[Unit];
LRU.MI = UseMI;
LRU.Op = DefOp;
}
@@ -895,31 +909,27 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand &MO = *MOI;
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
if (!Reg.isPhysical())
continue;
if (MO.readsReg())
- ReadOps.push_back(MI.getOperandNo(MOI));
+ ReadOps.push_back(MO.getOperandNo());
if (!MO.isDef())
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units) {
- SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
if (I == RegUnits.end())
continue;
unsigned DepHeight = I->Cycle;
if (!MI.isTransient()) {
// We may not know the UseMI of this dependency, if it came from the
// live-in list. SchedModel can handle a NULL UseMI.
- DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
+ DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
I->MI, I->Op);
}
Height = std::max(Height, DepHeight);
@@ -931,8 +941,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
// Now we know the height of MI. Update any regunits read.
for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- LiveRegUnit &LRU = RegUnits[*Units];
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ LiveRegUnit &LRU = RegUnits[Unit];
// Set the height to the highest reader of the unit.
if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
@@ -1087,10 +1097,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
}
// Go through the block backwards.
- for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
- BI != BB;) {
- const MachineInstr &MI = *--BI;
-
+ for (const MachineInstr &MI : reverse(*MBB)) {
// Find the MI height as determined by virtual register uses in the
// trace below.
unsigned Cycle = 0;
@@ -1137,11 +1144,10 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
}
// Transfer the live regunits to the live-in list.
- for (SparseSet<LiveRegUnit>::const_iterator
- RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
- TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
- LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@'
- << RI->Cycle);
+ for (const LiveRegUnit &RU : RegUnits) {
+ TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
+ << RU.Cycle);
}
LLVM_DEBUG(dbgs() << '\n');
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 2fe5e40a58c2..0e02c50284c6 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -20,9 +20,7 @@ using namespace llvm;
template <>
bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
const MachineInstr &I) const {
- for (auto &op : I.operands()) {
- if (!op.isReg() || !op.isDef())
- continue;
+ for (auto &op : I.all_defs()) {
if (isDivergent(op.getReg()))
return true;
}
@@ -31,21 +29,17 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
template <>
bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent(
- const MachineInstr &Instr, bool AllDefsDivergent) {
+ const MachineInstr &Instr) {
bool insertedDivergent = false;
const auto &MRI = F.getRegInfo();
+ const auto &RBI = *F.getSubtarget().getRegBankInfo();
const auto &TRI = *MRI.getTargetRegisterInfo();
- for (auto &op : Instr.operands()) {
- if (!op.isReg() || !op.isDef())
- continue;
+ for (auto &op : Instr.all_defs()) {
if (!op.getReg().isVirtual())
continue;
assert(!op.getSubReg());
- if (!AllDefsDivergent) {
- auto *RC = MRI.getRegClassOrNull(op.getReg());
- if (RC && !TRI.isDivergentRegClass(RC))
- continue;
- }
+ if (TRI.isUniformReg(MRI, RBI, op.getReg()))
+ continue;
insertedDivergent |= markDivergent(op.getReg());
}
return insertedDivergent;
@@ -64,7 +58,7 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
}
if (uniformity == InstructionUniformity::NeverUniform) {
- markDefsDivergent(instr, /* AllDefsDivergent = */ false);
+ markDivergent(instr);
}
}
}
@@ -73,12 +67,10 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
template <>
void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
Register Reg) {
+ assert(isDivergent(Reg));
const auto &RegInfo = F.getRegInfo();
for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
- if (isAlwaysUniform(UserInstr))
- continue;
- if (markDivergent(UserInstr))
- Worklist.push_back(&UserInstr);
+ markDivergent(UserInstr);
}
}
@@ -88,9 +80,10 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
assert(!isAlwaysUniform(Instr));
if (Instr.isTerminator())
return;
- for (const MachineOperand &op : Instr.operands()) {
- if (op.isReg() && op.isDef() && op.getReg().isVirtual())
- pushUsers(op.getReg());
+ for (const MachineOperand &op : Instr.all_defs()) {
+ auto Reg = op.getReg();
+ if (isDivergent(Reg))
+ pushUsers(Reg);
}
}
@@ -102,7 +95,12 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
if (!Op.isReg() || !Op.readsReg())
continue;
auto Reg = Op.getReg();
- assert(Reg.isVirtual());
+
+ // FIXME: Physical registers need to be properly checked instead of always
+ // returning true
+ if (Reg.isPhysical())
+ return true;
+
auto *Def = F.getRegInfo().getVRegDef(Reg);
if (DefCycle.contains(Def->getParent()))
return true;
@@ -110,18 +108,59 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
return false;
}
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::
+ propagateTemporalDivergence(const MachineInstr &I,
+ const MachineCycle &DefCycle) {
+ const auto &RegInfo = F.getRegInfo();
+ for (auto &Op : I.all_defs()) {
+ if (!Op.getReg().isVirtual())
+ continue;
+ auto Reg = Op.getReg();
+ if (isDivergent(Reg))
+ continue;
+ for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
+ if (DefCycle.contains(UserInstr.getParent()))
+ continue;
+ markDivergent(UserInstr);
+ }
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
+ const MachineOperand &U) const {
+ if (!U.isReg())
+ return false;
+
+ auto Reg = U.getReg();
+ if (isDivergent(Reg))
+ return true;
+
+ const auto &RegInfo = F.getRegInfo();
+ auto *Def = RegInfo.getOneDef(Reg);
+ if (!Def)
+ return true;
+
+ auto *DefInstr = Def->getParent();
+ auto *UseInstr = U.getParent();
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<MachineSSAContext>;
template struct llvm::GenericUniformityAnalysisImplDeleter<
llvm::GenericUniformityAnalysisImpl<MachineSSAContext>>;
-MachineUniformityInfo
-llvm::computeMachineUniformityInfo(MachineFunction &F,
- const MachineCycleInfo &cycleInfo,
- const MachineDomTree &domTree) {
+MachineUniformityInfo llvm::computeMachineUniformityInfo(
+ MachineFunction &F, const MachineCycleInfo &cycleInfo,
+ const MachineDomTree &domTree, bool HasBranchDivergence) {
assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
- return MachineUniformityInfo(F, domTree, cycleInfo);
+ MachineUniformityInfo UI(F, domTree, cycleInfo);
+ if (HasBranchDivergence)
+ UI.compute();
+ return UI;
}
namespace {
@@ -181,7 +220,9 @@ void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) {
auto &DomTree = getAnalysis<MachineDominatorTree>().getBase();
auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
- UI = computeMachineUniformityInfo(MF, CI, DomTree);
+ // FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a
+ // default NoTTI
+ UI = computeMachineUniformityInfo(MF, CI, DomTree, true);
return false;
}
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index ddd5a027c2cd..7acd3c4039e8 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -31,13 +31,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -58,6 +58,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -71,7 +72,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
@@ -95,19 +95,19 @@ namespace {
Pass *const PASS;
const char *Banner;
- const MachineFunction *MF;
- const TargetMachine *TM;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
- const RegisterBankInfo *RBI;
+ const MachineFunction *MF = nullptr;
+ const TargetMachine *TM = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const RegisterBankInfo *RBI = nullptr;
- unsigned foundErrors;
+ unsigned foundErrors = 0;
// Avoid querying the MachineFunctionProperties for each operand.
- bool isFunctionRegBankSelected;
- bool isFunctionSelected;
- bool isFunctionTracksDebugUserValues;
+ bool isFunctionRegBankSelected = false;
+ bool isFunctionSelected = false;
+ bool isFunctionTracksDebugUserValues = false;
using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
@@ -115,8 +115,8 @@ namespace {
using RegMap = DenseMap<Register, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
- const MachineInstr *FirstNonPHI;
- const MachineInstr *FirstTerminator;
+ const MachineInstr *FirstNonPHI = nullptr;
+ const MachineInstr *FirstTerminator = nullptr;
BlockSet FunctionBlocks;
BitVector regsReserved;
@@ -208,10 +208,10 @@ namespace {
}
// Analysis information if available
- LiveVariables *LiveVars;
- LiveIntervals *LiveInts;
- LiveStacks *LiveStks;
- SlotIndexes *Indexes;
+ LiveVariables *LiveVars = nullptr;
+ LiveIntervals *LiveInts = nullptr;
+ LiveStacks *LiveStks = nullptr;
+ SlotIndexes *Indexes = nullptr;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -296,6 +296,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveStacks>();
AU.addUsedIfAvailable<LiveVariables>();
+ AU.addUsedIfAvailable<SlotIndexes>();
+ AU.addUsedIfAvailable<LiveIntervals>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -627,8 +629,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
- MBB->getIterator() != MBB->getParent()->begin()) {
- report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
+ MBB->getIterator() != MBB->getParent()->begin() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
+ report("MBB has allocatable live-in, but isn't entry, landing-pad, or "
+ "inlineasm-br-indirect-target.",
+ MBB);
report_context(LI.PhysReg);
}
}
@@ -1746,6 +1751,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("alignment immediate must be >= 1", MI);
break;
}
+ case TargetOpcode::G_CONSTANT_POOL: {
+ if (!MI->getOperand(1).isCPI())
+ report("Src operand 1 must be a constant pool index", MI);
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("Dst operand 0 must be a pointer", MI);
+ break;
+ }
default:
break;
}
@@ -2162,6 +2174,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBankInfo *RBI = MF->getSubtarget().getRegBankInfo();
// If we're post-RegBankSelect, the gvreg must have a bank.
if (!RegBank && isFunctionRegBankSelected) {
@@ -2173,12 +2186,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Make sure the register fits into its register bank if any.
if (RegBank && Ty.isValid() &&
- RegBank->getSize() < Ty.getSizeInBits()) {
+ RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) {
report("Register bank is too small for virtual register", MO,
MONum);
errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
- << "-bits\n";
+ << RBI->getMaximumSize(RegBank->getID()) << ") to fit "
+ << Ty.getSizeInBits() << "-bits\n";
return;
}
}
@@ -2427,12 +2440,11 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
if (Reg.isPhysical() && !isReserved(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units) {
- if (MRI->isReservedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ if (MRI->isReservedRegUnit(Unit))
continue;
- if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
- checkLivenessAtUse(MO, MONum, UseIdx, *LR, *Units);
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(Unit))
+ checkLivenessAtUse(MO, MONum, UseIdx, *LR, Unit);
}
}
@@ -3096,108 +3108,109 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
return;
}
- // No more checks for live-out segments.
- if (S.end == LiveInts->getMBBEndIdx(EndMBB))
- return;
-
- // RegUnit intervals are allowed dead phis.
- if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
- S.end == VNI->def.getDeadSlot())
- return;
-
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- report_context(LR, Reg, LaneMask);
- report_context(S);
- return;
- }
-
- // The block slot must refer to a basic block boundary.
- if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB);
- report_context(LR, Reg, LaneMask);
- report_context(S);
- }
+ // Checks for non-live-out segments.
+ if (S.end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // RegUnit intervals are allowed dead phis.
+ if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
+ S.end == VNI->def.getDeadSlot())
+ return;
- if (S.end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB);
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
+ return;
}
- }
- // After tied operands are rewritten, a live segment can only end at an
- // early-clobber slot if it is being redefined by an early-clobber def.
- // TODO: Before tied operands are rewritten, a live segment can only end at an
- // early-clobber slot if the last use is tied to an early-clobber def.
- if (MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::TiedOpsRewritten) &&
- S.end.isEarlyClobber()) {
- if (I+1 == LR.end() || (I+1)->start != S.end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB);
+ // The block slot must refer to a basic block boundary.
+ if (S.end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
}
- }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (Reg.isVirtual()) {
- // A live segment can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasSubRegDef = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != Reg)
- continue;
- unsigned Sub = MOI->getSubReg();
- LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
- : LaneBitmask::getAll();
- if (MOI->isDef()) {
- if (Sub != 0) {
- hasSubRegDef = true;
- // An operand %0:sub0 reads %0:sub1..n. Invert the lane
- // mask for subregister defs. Read-undef defs will be handled by
- // readsReg below.
- SLM = ~SLM;
- }
- if (MOI->isDead())
- hasDeadDef = true;
+ if (S.end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(S.start, S.end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
- if (LaneMask.any() && (LaneMask & SLM).none())
- continue;
- if (MOI->readsReg())
- hasRead = true;
}
- if (S.end.isDead()) {
- // Make sure that the corresponding machine operand for a "dead" live
- // range has the dead flag. We cannot perform this check for subregister
- // liveranges as partially dead values are allowed.
- if (LaneMask.none() && !hasDeadDef) {
- report("Instruction ending live segment on dead slot has no dead flag",
- MI);
+
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at
+ // an early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
+ if (I + 1 == LR.end() || (I + 1)->start != S.end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction",
+ EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
}
- } else {
- if (!hasRead) {
- // When tracking subregister liveness, the main range must start new
- // values on partial register writes, even if there is no read.
- if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
- !hasSubRegDef) {
- report("Instruction ending live segment doesn't read the register",
- MI);
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (Reg.isVirtual()) {
+ // A live segment can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasSubRegDef = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != Reg)
+ continue;
+ unsigned Sub = MOI->getSubReg();
+ LaneBitmask SLM =
+ Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : LaneBitmask::getAll();
+ if (MOI->isDef()) {
+ if (Sub != 0) {
+ hasSubRegDef = true;
+ // An operand %0:sub0 reads %0:sub1..n. Invert the lane
+ // mask for subregister defs. Read-undef defs will be handled by
+ // readsReg below.
+ SLM = ~SLM;
+ }
+ if (MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (LaneMask.any() && (LaneMask & SLM).none())
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ }
+ if (S.end.isDead()) {
+ // Make sure that the corresponding machine operand for a "dead" live
+ // range has the dead flag. We cannot perform this check for subregister
+ // liveranges as partially dead values are allowed.
+ if (LaneMask.none() && !hasDeadDef) {
+ report(
+ "Instruction ending live segment on dead slot has no dead flag",
+ MI);
report_context(LR, Reg, LaneMask);
report_context(S);
}
+ } else {
+ if (!hasRead) {
+ // When tracking subregister liveness, the main range must start new
+ // values on partial register writes, even if there is no read.
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
+ !hasSubRegDef) {
+ report("Instruction ending live segment doesn't read the register",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
}
}
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index af9fef0720f9..0bef513342ff 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -74,10 +74,7 @@ void ModuloScheduleExpander::expand() {
// stage difference for each use. Keep the maximum value.
for (MachineInstr *MI : Schedule.getInstructions()) {
int DefStage = Schedule.getStage(MI);
- for (const MachineOperand &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isDef())
- continue;
-
+ for (const MachineOperand &Op : MI->all_defs()) {
Register Reg = Op.getReg();
unsigned MaxDiff = 0;
bool PhiIsSwapped = false;
@@ -743,9 +740,7 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
}
bool used = true;
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_defs()) {
Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
if (reg.isPhysical()) {
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index e68a6398cf51..d997fbbed5a6 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -34,8 +34,8 @@ STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
namespace {
class OptimizePHIs : public MachineFunctionPass {
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
public:
static char ID; // Pass identification
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 51035d2e442f..dbb9a9ffdf60 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -63,9 +63,9 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
namespace {
class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
- LiveVariables *LV;
- LiveIntervals *LIS;
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index c3458be0f883..a08cc78f11b1 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -150,11 +150,11 @@ namespace {
class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DT; // Machine dominator tree
- MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachineLoopInfo *MLI = nullptr;
public:
static char ID; // Pass identification
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 98fc7e07a1b4..170008ab67cb 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -182,7 +182,7 @@ namespace {
private:
/// Apply each ScheduleDAGMutation step in order.
- void postprocessDAG();
+ void postProcessDAG();
void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
@@ -407,7 +407,7 @@ void SchedulePostRATDList::schedule() {
}
}
- postprocessDAG();
+ postProcessDAG();
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
LLVM_DEBUG(dump());
@@ -436,7 +436,7 @@ void SchedulePostRATDList::finishBlock() {
}
/// Apply each ScheduleDAGMutation step in order.
-void SchedulePostRATDList::postprocessDAG() {
+void SchedulePostRATDList::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 87e2f9f20021..3448c56e4994 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -6,14 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
-// intrinsics.
+// This pass implements IR lowering for the llvm.memcpy, llvm.memmove,
+// llvm.memset, llvm.load.relative and llvm.objc.* intrinsics.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,9 +26,44 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
using namespace llvm;
+/// Threshold to leave statically sized memory intrinsic calls. Calls of known
+/// size larger than this will be expanded by the pass. Calls of unknown or
+/// lower size will be left for expansion in codegen.
+static cl::opt<int64_t> MemIntrinsicExpandSizeThresholdOpt(
+ "mem-intrinsic-expand-size",
+ cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(-1),
+ cl::Hidden);
+
+namespace {
+
+struct PreISelIntrinsicLowering {
+ const function_ref<TargetTransformInfo &(Function &)> LookupTTI;
+ const function_ref<TargetLibraryInfo &(Function &)> LookupLibInfo;
+
+ /// If this is true, assume it's preferably to leave memory intrinsic calls
+ /// for replacement with a library call later. Otherwise this depends on
+ /// TargetLibraryInfo availability of the corresponding function.
+ const bool UseMemIntrinsicLibFunc;
+
+ explicit PreISelIntrinsicLowering(
+ function_ref<TargetTransformInfo &(Function &)> LookupTTI_,
+ function_ref<TargetLibraryInfo &(Function &)> LookupLibInfo_,
+ bool UseMemIntrinsicLibFunc_ = true)
+ : LookupTTI(LookupTTI_), LookupLibInfo(LookupLibInfo_),
+ UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {}
+
+ static bool shouldExpandMemIntrinsicWithSize(Value *Size,
+ const TargetTransformInfo &TTI);
+ bool expandMemIntrinsicUses(Function &F) const;
+ bool lowerIntrinsics(Module &M) const;
+};
+
+} // namespace
+
static bool lowerLoadRelative(Function &F) {
if (F.use_empty())
return false;
@@ -133,16 +170,104 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
return true;
}
-static bool lowerIntrinsics(Module &M) {
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize(
+ Value *Size, const TargetTransformInfo &TTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ if (!CI)
+ return true;
+ uint64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences()
+ ? MemIntrinsicExpandSizeThresholdOpt
+ : TTI.getMaxMemIntrinsicInlineSizeThreshold();
+ uint64_t SizeVal = CI->getZExtValue();
+
+ // Treat a threshold of 0 as a special case to force expansion of all
+ // intrinsics, including size 0.
+ return SizeVal > Threshold || Threshold == 0;
+}
+
+// TODO: Handle atomic memcpy and memcpy.inline
+// TODO: Pass ScalarEvolution
+bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
+ Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
- for (Function &F : M) {
- if (F.getName().startswith("llvm.load.relative.")) {
- Changed |= lowerLoadRelative(F);
- continue;
+
+ for (User *U : llvm::make_early_inc_range(F.users())) {
+ Instruction *Inst = cast<Instruction>(U);
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ Function *ParentFunc = Memcpy->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*ParentFunc).has(LibFunc_memcpy))
+ break;
+
+ expandMemCpyAsLoop(Memcpy, TTI);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ Function *ParentFunc = Memmove->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*ParentFunc).has(LibFunc_memmove))
+ break;
+
+ if (expandMemMoveAsLoop(Memmove, TTI)) {
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+ }
+
+ break;
}
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ Function *ParentFunc = Memset->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*Memset->getFunction()).has(LibFunc_memset))
+ break;
+
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled intrinsic");
+ }
+ }
+
+ return Changed;
+}
+
+bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
+ bool Changed = false;
+ for (Function &F : M) {
switch (F.getIntrinsicID()) {
default:
break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Changed |= expandMemIntrinsicUses(F);
+ break;
+ case Intrinsic::load_relative:
+ Changed |= lowerLoadRelative(F);
+ break;
case Intrinsic::objc_autorelease:
Changed |= lowerObjCCall(F, "objc_autorelease");
break;
@@ -231,7 +356,23 @@ public:
PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
- bool runOnModule(Module &M) override { return lowerIntrinsics(M); }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto LookupTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+
+ PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI);
+ return Lowering.lowerIntrinsics(M);
+ }
};
} // end anonymous namespace
@@ -248,7 +389,18 @@ ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
ModuleAnalysisManager &AM) {
- if (!lowerIntrinsics(M))
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI);
+ if (!Lowering.lowerIntrinsics(M))
return PreservedAnalyses::all();
else
return PreservedAnalyses::none();
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 7e46dd35ce47..be81ecab9c89 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -27,9 +27,9 @@ namespace {
/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
/// for each use. Add isUndef marker to implicit_def defs and their uses.
class ProcessImplicitDefs : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
SmallSetVector<MachineInstr*, 16> WorkList;
@@ -72,8 +72,8 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
!MI->isRegSequence() &&
!MI->isPHI())
return false;
- for (const MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isUse() && MO.readsReg())
+ for (const MachineOperand &MO : MI->all_uses())
+ if (MO.readsReg())
return false;
return true;
}
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cc70ec477650..e323aaaeefaf 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -96,7 +96,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- RegScavenger *RS;
+ RegScavenger *RS = nullptr;
// MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
// stack frame indexes.
@@ -111,11 +111,11 @@ private:
// Flag to control whether to use the register scavenger to resolve
// frame index materialization registers. Set according to
// TRI->requiresFrameIndexScavenging() for the current function.
- bool FrameIndexVirtualScavenging;
+ bool FrameIndexVirtualScavenging = false;
// Flag to control whether the scavenger should be passed even though
// FrameIndexVirtualScavenging is used.
- bool FrameIndexEliminationScavenging;
+ bool FrameIndexEliminationScavenging = false;
// Emit remarks.
MachineOptimizationRemarkEmitter *ORE = nullptr;
@@ -309,19 +309,20 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
SpillSize += MFI.getObjectSize(Idx);
}
- float SpillPct =
+ [[maybe_unused]] float SpillPct =
static_cast<float>(SpillSize) / static_cast<float>(StackSize);
- float VarPct = 1.0f - SpillPct;
- int64_t VariableSize = StackSize - SpillSize;
- dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
- SpillSize, StackSize, VariableSize, SpillPct, VarPct);
+ LLVM_DEBUG(
+ dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
+ SpillSize, StackSize, StackSize - SpillSize, SpillPct,
+ 1.0f - SpillPct));
if (UnsafeStackSize != 0) {
- float UnsafePct =
- static_cast<float>(UnsafeStackSize) / static_cast<float>(StackSize);
- dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack", UnsafeStackSize,
- UnsafePct, StackSize);
+ LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack",
+ UnsafeStackSize,
+ static_cast<float>(UnsafeStackSize) /
+ static_cast<float>(StackSize),
+ StackSize));
}
- dbgs() << "\n";
+ LLVM_DEBUG(dbgs() << "\n");
}
ORE->emit([&]() {
@@ -375,8 +376,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
}
assert(!MFI.isMaxCallFrameSizeComputed() ||
- (MFI.getMaxCallFrameSize() == MaxCallFrameSize &&
- MFI.adjustsStack() == AdjustsStack));
+ (MFI.getMaxCallFrameSize() >= MaxCallFrameSize &&
+ !(AdjustsStack && !MFI.adjustsStack())));
MFI.setAdjustsStack(AdjustsStack);
MFI.setMaxCallFrameSize(MaxCallFrameSize);
@@ -692,7 +693,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
- Align &MaxAlign, unsigned Skew) {
+ Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
@@ -704,7 +705,7 @@ static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Alignment, Skew);
+ Offset = alignTo(Offset, Alignment);
if (StackGrowsDown) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
@@ -828,11 +829,10 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo &MFI, bool StackGrowsDown,
- int64_t &Offset, Align &MaxAlign,
- unsigned Skew) {
+ int64_t &Offset, Align &MaxAlign) {
for (int i : UnassignedObjs) {
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
ProtectedObjs.insert(i);
}
}
@@ -858,9 +858,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
&& "Local area offset should be in direction of stack growth");
int64_t Offset = LocalAreaOffset;
- // Skew to be applied to alignment.
- unsigned Skew = TFI.getStackAlignmentSkew(MF);
-
#ifdef EXPENSIVE_CHECKS
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
if (!MFI.isDeadObjectIndex(i) &&
@@ -908,8 +905,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex))
continue;
- AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign,
- Skew);
+ AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign);
}
}
@@ -930,7 +926,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
for (int SFI : SFIs)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -941,7 +937,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
Align Alignment = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Alignment, Skew);
+ Offset = alignTo(Offset, Alignment);
LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -987,8 +983,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
"Stack protector on non-default stack expected to not be "
"pre-allocated by LocalStackSlotPass.");
} else if (!MFI.getUseLocalStackAllocationBlock()) {
- AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
- Skew);
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset,
+ MaxAlign);
} else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
llvm_unreachable(
"Stack protector not pre-allocated by LocalStackSlotPass.");
@@ -1036,11 +1032,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
"LocalStackSlotPass.");
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
}
SmallVector<int, 8> ObjectsToAllocate;
@@ -1071,7 +1067,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Allocate the EH registration node first if one is present.
if (EHRegNodeFrameIndex != std::numeric_limits<int>::max())
AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
- MaxAlign, Skew);
+ MaxAlign);
// Give the targets a chance to order the objects the way they like it.
if (MF.getTarget().getOptLevel() != CodeGenOpt::None &&
@@ -1093,7 +1089,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
for (auto &Object : ObjectsToAllocate)
if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
StackBytesFree))
- AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign);
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
@@ -1101,7 +1097,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
for (int SFI : SFIs)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -1127,7 +1123,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
int64_t OffsetBeforeAlignment = Offset;
- Offset = alignTo(Offset, StackAlign, Skew);
+ Offset = alignTo(Offset, StackAlign);
// If we have increased the offset to fulfill the alignment constrants,
// then the scavenging spill slots may become harder to reach from the
@@ -1291,8 +1287,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
MCRegister Reg = MO.getReg();
// This picks up sibling registers (e.q. %al -> %ah).
- for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
- RegsToZero.reset(*Unit);
+ for (MCRegUnit Unit : TRI.regunits(Reg))
+ RegsToZero.reset(Unit);
for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
RegsToZero.reset(SReg);
@@ -1463,14 +1459,24 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
assert(MF.getSubtarget().getRegisterInfo() &&
"getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
- RS->enterBasicBlockEnd(*BB);
+ RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr;
+ if (LocalRS)
+ LocalRS->enterBasicBlockEnd(*BB);
for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+ if (TII.isFrameInstr(MI)) {
+ TFI.eliminateCallFramePseudoInstr(MF, *BB, &MI);
+ continue;
+ }
+
+ // Step backwards to get the liveness state at (immedately after) MI.
+ if (LocalRS)
+ LocalRS->backward(MI);
- // Register scavenger backward step
- MachineBasicBlock::iterator Step(MI);
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isFI())
continue;
@@ -1478,49 +1484,20 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
continue;
- // If this instruction has a FrameIndex operand, we need to
- // use that target machine register info object to eliminate
- // it.
-
- // TRI.eliminateFrameIndex may lower the frame index to a sequence of
- // instructions. It also can remove/change instructions passed by the
- // iterator and invalidate the iterator. We have to take care of this. For
- // that we support two iterators: *Step* - points to the position up to
- // which the scavenger should scan by the next iteration to have liveness
- // information up to date. *Curr* - keeps track of the correct RS->MBBI -
- // the scan start point. It points to the currently processed instruction
- // right before the frame lowering.
+ // Eliminate this FrameIndex operand.
//
- // ITERATORS WORK AS FOLLOWS:
- // *Step* is shifted one step back right before the frame lowering and
- // one step forward right after it. No matter how many instructions were
- // inserted, *Step* will be right after the position which is going to be
- // processed in the next iteration, thus, in the correct position for the
- // scavenger to go up to.
- // *Curr* is shifted one step forward right before calling
- // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure
- // it points right to the position that is the correct starting point for
- // the scavenger to scan.
- MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition();
-
- // Shift back
- --Step;
-
+ // Save and restore the scavenger's position around the call to
+ // eliminateFrameIndex in case it erases MI and invalidates the iterator.
+ MachineBasicBlock::iterator Save;
+ if (LocalRS)
+ Save = std::next(LocalRS->getCurrentPosition());
bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
- // Restore to unify logic with a shift back that happens in the end of
- // the outer loop.
- ++Step;
- RS->skipTo(--Curr);
+ if (LocalRS)
+ LocalRS->skipTo(std::prev(Save));
+
if (Removed)
break;
}
-
- // Shift it to make RS collect reg info up to the current instruction.
- if (Step != BB->begin())
- Step--;
-
- // Update register states.
- RS->backward(Step);
}
}
@@ -1532,7 +1509,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (RS && TRI.supportsBackwardScavenger())
+ if (TRI.supportsBackwardScavenger())
return replaceFrameIndicesBackward(BB, MF, SPAdj);
if (RS && FrameIndexEliminationScavenging)
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 86ea3ec67178..913e0035b046 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -128,10 +128,7 @@ public:
private:
uint64_t getFuncGUID(Module *M, DILocation *DL) {
- auto *SP = DL->getScope()->getSubprogram();
- auto Name = SP->getLinkageName();
- if (Name.empty())
- Name = SP->getName();
+ auto Name = DL->getSubprogramLinkageName();
return Function::getGUID(Name);
}
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index dcb1a44c75e4..abf3b1e6fbb9 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,7 +8,6 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
-#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -38,64 +38,69 @@
#include <utility>
#include <vector>
-using namespace llvm;
-using namespace rdf;
-
// Printing functions. Have them here first, so that the rest of the code
// can use them.
-namespace llvm {
-namespace rdf {
-
-raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
- if (!P.Mask.all())
- OS << ':' << PrintLaneMask(P.Mask);
- return OS;
-}
+namespace llvm::rdf {
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
- auto &TRI = P.G.getTRI();
- if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
- OS << TRI.getName(P.Obj.Reg);
- else
- OS << '#' << P.Obj.Reg;
- OS << PrintLaneMaskOpt(P.Obj.Mask);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P) {
+ P.G.getPRI().print(OS, P.Obj);
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
- auto NA = P.G.addr<NodeBase*>(P.Obj);
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P) {
+ if (P.Obj == 0)
+ return OS << "null";
+ auto NA = P.G.addr<NodeBase *>(P.Obj);
uint16_t Attrs = NA.Addr->getAttrs();
uint16_t Kind = NodeAttrs::kind(Attrs);
uint16_t Flags = NodeAttrs::flags(Attrs);
switch (NodeAttrs::type(Attrs)) {
- case NodeAttrs::Code:
- switch (Kind) {
- case NodeAttrs::Func: OS << 'f'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- case NodeAttrs::Stmt: OS << 's'; break;
- case NodeAttrs::Phi: OS << 'p'; break;
- default: OS << "c?"; break;
- }
+ case NodeAttrs::Code:
+ switch (Kind) {
+ case NodeAttrs::Func:
+ OS << 'f';
break;
- case NodeAttrs::Ref:
- if (Flags & NodeAttrs::Undef)
- OS << '/';
- if (Flags & NodeAttrs::Dead)
- OS << '\\';
- if (Flags & NodeAttrs::Preserving)
- OS << '+';
- if (Flags & NodeAttrs::Clobbering)
- OS << '~';
- switch (Kind) {
- case NodeAttrs::Use: OS << 'u'; break;
- case NodeAttrs::Def: OS << 'd'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- default: OS << "r?"; break;
- }
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ case NodeAttrs::Stmt:
+ OS << 's';
+ break;
+ case NodeAttrs::Phi:
+ OS << 'p';
break;
default:
- OS << '?';
+ OS << "c?";
+ break;
+ }
+ break;
+ case NodeAttrs::Ref:
+ if (Flags & NodeAttrs::Undef)
+ OS << '/';
+ if (Flags & NodeAttrs::Dead)
+ OS << '\\';
+ if (Flags & NodeAttrs::Preserving)
+ OS << '+';
+ if (Flags & NodeAttrs::Clobbering)
+ OS << '~';
+ switch (Kind) {
+ case NodeAttrs::Use:
+ OS << 'u';
break;
+ case NodeAttrs::Def:
+ OS << 'd';
+ break;
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ default:
+ OS << "r?";
+ break;
+ }
+ break;
+ default:
+ OS << '?';
+ break;
}
OS << P.Obj;
if (Flags & NodeAttrs::Shadow)
@@ -103,15 +108,14 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
return OS;
}
-static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
- const DataFlowGraph &G) {
- OS << Print(RA.Id, G) << '<'
- << Print(RA.Addr->getRegRef(G), G) << '>';
+static void printRefHeader(raw_ostream &OS, const Ref RA,
+ const DataFlowGraph &G) {
+ OS << Print(RA.Id, G) << '<' << Print(RA.Addr->getRegRef(G), G) << '>';
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
OS << '!';
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Def> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -128,7 +132,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Use> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -139,8 +143,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<PhiUse> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -154,22 +157,22 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Ref> &P) {
switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Def:
- OS << PrintNode<DefNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Use:
- if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
- OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
- else
- OS << PrintNode<UseNode*>(P.Obj, P.G);
- break;
+ case NodeAttrs::Def:
+ OS << PrintNode<DefNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Use:
+ if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+ OS << PrintNode<PhiUseNode *>(P.Obj, P.G);
+ else
+ OS << PrintNode<UseNode *>(P.Obj, P.G);
+ break;
}
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
OS << Print(I.Id, P.G);
@@ -179,7 +182,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
OS << Print(I, P.G);
@@ -191,45 +194,43 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
namespace {
- template <typename T>
- struct PrintListV {
- PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+template <typename T> struct PrintListV {
+ PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
- using Type = T;
- const NodeList &List;
- const DataFlowGraph &G;
- };
+ using Type = T;
+ const NodeList &List;
+ const DataFlowGraph &G;
+};
- template <typename T>
- raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
- unsigned N = P.List.size();
- for (NodeAddr<T> A : P.List) {
- OS << PrintNode<T>(A, P.G);
- if (--N)
- OS << ", ";
- }
- return OS;
+template <typename T>
+raw_ostream &operator<<(raw_ostream &OS, const PrintListV<T> &P) {
+ unsigned N = P.List.size();
+ for (NodeAddr<T> A : P.List) {
+ OS << PrintNode<T>(A, P.G);
+ if (--N)
+ OS << ", ";
}
+ return OS;
+}
} // end anonymous namespace
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Phi> &P) {
OS << Print(P.Obj.Id, P.G) << ": phi ["
- << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Stmt> &P) {
const MachineInstr &MI = *P.Obj.Addr->getCode();
unsigned Opc = MI.getOpcode();
OS << Print(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
// Print the target for calls and branches (for readability).
if (MI.isCall() || MI.isBranch()) {
MachineInstr::const_mop_iterator T =
- llvm::find_if(MI.operands(),
- [] (const MachineOperand &Op) -> bool {
- return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
- });
+ llvm::find_if(MI.operands(), [](const MachineOperand &Op) -> bool {
+ return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+ });
if (T != MI.operands_end()) {
OS << ' ';
if (T->isMBB())
@@ -240,32 +241,30 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
OS << T->getSymbolName();
}
}
- OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ OS << " [" << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<InstrNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Instr> &P) {
switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Phi:
- OS << PrintNode<PhiNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Stmt:
- OS << PrintNode<StmtNode*>(P.Obj, P.G);
- break;
- default:
- OS << "instr? " << Print(P.Obj.Id, P.G);
- break;
+ case NodeAttrs::Phi:
+ OS << PrintNode<PhiNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Stmt:
+ OS << PrintNode<StmtNode *>(P.Obj, P.G);
+ break;
+ default:
+ OS << "instr? " << Print(P.Obj.Id, P.G);
+ break;
}
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<BlockNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P) {
MachineBasicBlock *BB = P.Obj.Addr->getCode();
unsigned NP = BB->pred_size();
std::vector<int> Ns;
- auto PrintBBs = [&OS] (std::vector<int> Ns) -> void {
+ auto PrintBBs = [&OS](std::vector<int> Ns) -> void {
unsigned N = Ns.size();
for (int I : Ns) {
OS << "%bb." << I;
@@ -289,20 +288,21 @@ raw_ostream &operator<< (raw_ostream &OS,
OS << '\n';
for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<InstrNode*>(I, P.G) << '\n';
+ OS << PrintNode<InstrNode *>(I, P.G) << '\n';
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
- OS << "DFG dump:[\n" << Print(P.Obj.Id, P.G) << ": Function: "
- << P.Obj.Addr->getCode()->getName() << '\n';
+raw_ostream &operator<<(raw_ostream &OS, const Print<Func> &P) {
+ OS << "DFG dump:[\n"
+ << Print(P.Obj.Id, P.G)
+ << ": Function: " << P.Obj.Addr->getCode()->getName() << '\n';
for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<BlockNode*>(I, P.G) << '\n';
+ OS << PrintNode<BlockNode *>(I, P.G) << '\n';
OS << "]\n";
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P) {
OS << '{';
for (auto I : P.Obj)
OS << ' ' << Print(I, P.G);
@@ -310,16 +310,16 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
- P.Obj.print(OS);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P) {
+ OS << P.Obj;
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P) {
- for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
- OS << Print(I->Id, P.G)
- << '<' << Print(I->Addr->getRegRef(P.G), P.G) << '>';
+raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P) {
+ for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E;) {
+ OS << Print(I->Id, P.G) << '<' << Print(I->Addr->getRegRef(P.G), P.G)
+ << '>';
I.down();
if (I != E)
OS << ' ';
@@ -327,9 +327,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-} // end namespace rdf
-} // end namespace llvm
-
// Node allocation functions.
//
// Node allocator is like a slab memory allocator: it allocates blocks of
@@ -340,13 +337,13 @@ raw_ostream &operator<< (raw_ostream &OS,
// and within that block is described in the header file.
//
void NodeAllocator::startNewBlock() {
- void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
- char *P = static_cast<char*>(T);
+ void *T = MemPool.Allocate(NodesPerBlock * NodeMemSize, NodeMemSize);
+ char *P = static_cast<char *>(T);
Blocks.push_back(P);
// Check if the block index is still within the allowed range, i.e. less
// than 2^N, where N is the number of bits in NodeId for the block index.
// BitsPerIndex is the number of bits per node index.
- assert((Blocks.size() < ((size_t)1 << (8*sizeof(NodeId)-BitsPerIndex))) &&
+ assert((Blocks.size() < ((size_t)1 << (8 * sizeof(NodeId) - BitsPerIndex))) &&
"Out of bits for block index");
ActiveEnd = P;
}
@@ -356,18 +353,17 @@ bool NodeAllocator::needNewBlock() {
return true;
char *ActiveBegin = Blocks.back();
- uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
+ uint32_t Index = (ActiveEnd - ActiveBegin) / NodeMemSize;
return Index >= NodesPerBlock;
}
-NodeAddr<NodeBase*> NodeAllocator::New() {
+Node NodeAllocator::New() {
if (needNewBlock())
startNewBlock();
- uint32_t ActiveB = Blocks.size()-1;
- uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
- NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
- makeId(ActiveB, Index) };
+ uint32_t ActiveB = Blocks.size() - 1;
+ uint32_t Index = (ActiveEnd - Blocks[ActiveB]) / NodeMemSize;
+ Node NA = {reinterpret_cast<NodeBase *>(ActiveEnd), makeId(ActiveB, Index)};
ActiveEnd += NodeMemSize;
return NA;
}
@@ -376,9 +372,9 @@ NodeId NodeAllocator::id(const NodeBase *P) const {
uintptr_t A = reinterpret_cast<uintptr_t>(P);
for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
- if (A < B || A >= B + NodesPerBlock*NodeMemSize)
+ if (A < B || A >= B + NodesPerBlock * NodeMemSize)
continue;
- uint32_t Idx = (A-B)/NodeMemSize;
+ uint32_t Idx = (A - B) / NodeMemSize;
return makeId(i, Idx);
}
llvm_unreachable("Invalid node address");
@@ -391,7 +387,7 @@ void NodeAllocator::clear() {
}
// Insert node NA after "this" in the circular chain.
-void NodeBase::append(NodeAddr<NodeBase*> NA) {
+void NodeBase::append(Node NA) {
NodeId Nx = Next;
// If NA is already "next", do nothing.
if (Next != NA.Id) {
@@ -406,9 +402,9 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
- return G.unpack(Ref.PR);
- assert(Ref.Op != nullptr);
- return G.makeRegRef(*Ref.Op);
+ return G.unpack(RefData.PR);
+ assert(RefData.Op != nullptr);
+ return G.makeRegRef(*RefData.Op);
}
// Set the register reference in the reference node directly (for references
@@ -416,7 +412,7 @@ RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
- Ref.PR = G.pack(RR);
+ RefData.PR = G.pack(RR);
}
// Set the register reference in the reference node based on a machine
@@ -425,83 +421,82 @@ void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
(void)G;
- Ref.Op = Op;
+ RefData.Op = Op;
}
// Get the owner of a given reference node.
-NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+Node RefNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
while (NA.Addr != this) {
if (NA.Addr->getType() == NodeAttrs::Code)
return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
}
llvm_unreachable("No owner in circular list");
}
// Connect the def node to the reaching def node.
-void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedDef();
+void DefNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedDef();
DA.Addr->setReachedDef(Self);
}
// Connect the use node to the reaching def node.
-void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedUse();
+void UseNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedUse();
DA.Addr->setReachedUse(Self);
}
// Get the first member of the code node.
-NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
- if (Code.FirstM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.FirstM);
+Node CodeNode::getFirstMember(const DataFlowGraph &G) const {
+ if (CodeData.FirstM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.FirstM);
}
// Get the last member of the code node.
-NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
- if (Code.LastM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.LastM);
+Node CodeNode::getLastMember(const DataFlowGraph &G) const {
+ if (CodeData.LastM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.LastM);
}
// Add node NA at the end of the member list of the given code node.
-void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> ML = getLastMember(G);
+void CodeNode::addMember(Node NA, const DataFlowGraph &G) {
+ Node ML = getLastMember(G);
if (ML.Id != 0) {
ML.Addr->append(NA);
} else {
- Code.FirstM = NA.Id;
+ CodeData.FirstM = NA.Id;
NodeId Self = G.id(this);
NA.Addr->setNext(Self);
}
- Code.LastM = NA.Id;
+ CodeData.LastM = NA.Id;
}
// Add node NA after member node MA in the given code node.
-void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G) {
+void CodeNode::addMemberAfter(Node MA, Node NA, const DataFlowGraph &G) {
MA.Addr->append(NA);
- if (Code.LastM == MA.Id)
- Code.LastM = NA.Id;
+ if (CodeData.LastM == MA.Id)
+ CodeData.LastM = NA.Id;
}
// Remove member node NA from the given code node.
-void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> MA = getFirstMember(G);
+void CodeNode::removeMember(Node NA, const DataFlowGraph &G) {
+ Node MA = getFirstMember(G);
assert(MA.Id != 0);
// Special handling if the member to remove is the first member.
if (MA.Id == NA.Id) {
- if (Code.LastM == MA.Id) {
+ if (CodeData.LastM == MA.Id) {
// If it is the only member, set both first and last to 0.
- Code.FirstM = Code.LastM = 0;
+ CodeData.FirstM = CodeData.LastM = 0;
} else {
// Otherwise, advance the first member.
- Code.FirstM = MA.Addr->getNext();
+ CodeData.FirstM = MA.Addr->getNext();
}
return;
}
@@ -512,37 +507,37 @@ void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
MA.Addr->setNext(NA.Addr->getNext());
// If the member to remove happens to be the last one, update the
// LastM indicator.
- if (Code.LastM == NA.Id)
- Code.LastM = MA.Id;
+ if (CodeData.LastM == NA.Id)
+ CodeData.LastM = MA.Id;
return;
}
- MA = G.addr<NodeBase*>(MX);
+ MA = G.addr<NodeBase *>(MX);
}
llvm_unreachable("No such member");
}
// Return the list of all members of the code node.
NodeList CodeNode::members(const DataFlowGraph &G) const {
- static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
+ static auto True = [](Node) -> bool { return true; };
return members_if(True, G);
}
// Return the owner of the given instr node.
-NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+Node InstrNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
while (NA.Addr != this) {
assert(NA.Addr->getType() == NodeAttrs::Code);
if (NA.Addr->getKind() == NodeAttrs::Block)
return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
}
llvm_unreachable("No owner in circular list");
}
// Add the phi node PA to the given block node.
-void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> M = getFirstMember(G);
+void BlockNode::addPhi(Phi PA, const DataFlowGraph &G) {
+ Node M = getFirstMember(G);
if (M.Id == 0) {
addMember(PA, G);
return;
@@ -552,15 +547,15 @@ void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
if (M.Addr->getKind() == NodeAttrs::Stmt) {
// If the first member of the block is a statement, insert the phi as
// the first member.
- Code.FirstM = PA.Id;
+ CodeData.FirstM = PA.Id;
PA.Addr->setNext(M.Id);
} else {
// If the first member is a phi, find the last phi, and append PA to it.
assert(M.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<NodeBase*> MN = M;
+ Node MN = M;
do {
M = MN;
- MN = G.addr<NodeBase*>(M.Addr->getNext());
+ MN = G.addr<NodeBase *>(M.Addr->getNext());
assert(MN.Addr->getType() == NodeAttrs::Code);
} while (MN.Addr->getKind() == NodeAttrs::Phi);
@@ -571,19 +566,17 @@ void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
// Find the block node corresponding to the machine basic block BB in the
// given func node.
-NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const {
- auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
- return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
- };
+Block FuncNode::findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const {
+ auto EqBB = [BB](Node NA) -> bool { return Block(NA).Addr->getCode() == BB; };
NodeList Ms = members_if(EqBB, G);
if (!Ms.empty())
return Ms[0];
- return NodeAddr<BlockNode*>();
+ return Block();
}
// Get the block node for the entry block in the given function.
-NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
+Block FuncNode::getEntryBlock(const DataFlowGraph &G) {
MachineBasicBlock *EntryB = &getCode()->front();
return findBlock(EntryB, G);
}
@@ -593,14 +586,14 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
// For a given instruction, check if there are any bits of RR that can remain
// unchanged across this def.
-bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isPreserving(const MachineInstr &In,
+ unsigned OpNum) const {
return TII.isPredicated(In);
}
// Check if the definition of RR produces an unspecified value.
-bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isClobbering(const MachineInstr &In,
+ unsigned OpNum) const {
const MachineOperand &Op = In.getOperand(OpNum);
if (Op.isRegMask())
return true;
@@ -612,8 +605,8 @@ bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
}
// Check if the given instruction specifically requires
-bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In,
+ unsigned OpNum) const {
if (In.isCall() || In.isReturn() || In.isInlineAsm())
return true;
// Check for a tail call.
@@ -642,19 +635,20 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
//
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf)
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf)
: DefaultTOI(std::make_unique<TargetOperandInfo>(tii)), MF(mf), TII(tii),
TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(*DefaultTOI),
- LiveIns(PRI) {
-}
+ LiveIns(PRI) {}
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf,
+ const TargetOperandInfo &toi)
: MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
- LiveIns(PRI) {
-}
+ LiveIns(PRI) {}
// The implementation of the definition stack.
// Each register reference has its own definition stack. In particular,
@@ -663,7 +657,8 @@ DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
// Construct a stack iterator.
DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
- bool Top) : DS(S) {
+ bool Top)
+ : DS(S) {
if (!Top) {
// Initialize to bottom.
Pos = 0;
@@ -671,7 +666,7 @@ DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
}
// Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
Pos = DS.Stack.size();
- while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
+ while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos - 1]))
Pos--;
}
@@ -695,7 +690,7 @@ void DataFlowGraph::DefStack::pop() {
// Push a delimiter for block node N on the stack.
void DataFlowGraph::DefStack::start_block(NodeId N) {
assert(N != 0);
- Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
+ Stack.push_back(Def(nullptr, N));
}
// Remove all nodes from the top of the stack, until the delimited for
@@ -705,7 +700,7 @@ void DataFlowGraph::DefStack::clear_block(NodeId N) {
assert(N != 0);
unsigned P = Stack.size();
while (P > 0) {
- bool Found = isDelimiter(Stack[P-1], N);
+ bool Found = isDelimiter(Stack[P - 1], N);
P--;
if (Found)
break;
@@ -723,7 +718,7 @@ unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
assert(P < SS);
do {
P++;
- IsDelim = isDelimiter(Stack[P-1]);
+ IsDelim = isDelimiter(Stack[P - 1]);
} while (P < SS && IsDelim);
assert(!IsDelim);
return P;
@@ -734,11 +729,11 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Get the preceding valid position before P (skipping all delimiters).
// The input position P does not have to point to a non-delimiter.
assert(P > 0 && P <= Stack.size());
- bool IsDelim = isDelimiter(Stack[P-1]);
+ bool IsDelim = isDelimiter(Stack[P - 1]);
do {
if (--P == 0)
break;
- IsDelim = isDelimiter(Stack[P-1]);
+ IsDelim = isDelimiter(Stack[P - 1]);
} while (P > 0 && IsDelim);
assert(!IsDelim);
return P;
@@ -746,11 +741,10 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Register information.
-RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
- RegisterSet LR;
+RegisterAggr DataFlowGraph::getLandingPadLiveIns() const {
+ RegisterAggr LR(getPRI());
const Function &F = MF.getFunction();
- const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn()
- : nullptr;
+ const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr;
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
LR.insert(RegisterRef(R));
@@ -778,8 +772,8 @@ NodeId DataFlowGraph::id(const NodeBase *P) const {
}
// Allocate a new node and set the attributes to Attrs.
-NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
- NodeAddr<NodeBase*> P = Memory.New();
+Node DataFlowGraph::newNode(uint16_t Attrs) {
+ Node P = Memory.New();
P.Addr->init();
P.Addr->setAttrs(Attrs);
return P;
@@ -787,16 +781,16 @@ NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
// Make a copy of the given node B, except for the data-flow links, which
// are set to 0.
-NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
- NodeAddr<NodeBase*> NA = newNode(0);
+Node DataFlowGraph::cloneNode(const Node B) {
+ Node NA = newNode(0);
memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
// Ref nodes need to have the data-flow links reset.
if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
+ Ref RA = NA;
RA.Addr->setReachingDef(0);
RA.Addr->setSibling(0);
if (NA.Addr->getKind() == NodeAttrs::Def) {
- NodeAddr<DefNode*> DA = NA;
+ Def DA = NA;
DA.Addr->setReachedDef(0);
DA.Addr->setReachedUse(0);
}
@@ -806,75 +800,105 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
// Allocation routines for specific node types/kinds.
-NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+Use DataFlowGraph::newUse(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Use UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
UA.Addr->setRegRef(&Op, *this);
return UA;
}
-NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
- NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+PhiUse DataFlowGraph::newPhiUse(Phi Owner, RegisterRef RR, Block PredB,
+ uint16_t Flags) {
+ PhiUse PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
assert(Flags & NodeAttrs::PhiRef);
PUA.Addr->setRegRef(RR, *this);
PUA.Addr->setPredecessor(PredB.Id);
return PUA;
}
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+Def DataFlowGraph::newDef(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
DA.Addr->setRegRef(&Op, *this);
return DA;
}
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+Def DataFlowGraph::newDef(Instr Owner, RegisterRef RR, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
assert(Flags & NodeAttrs::PhiRef);
DA.Addr->setRegRef(RR, *this);
return DA;
}
-NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
- NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+Phi DataFlowGraph::newPhi(Block Owner) {
+ Phi PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
Owner.Addr->addPhi(PA, *this);
return PA;
}
-NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI) {
- NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+Stmt DataFlowGraph::newStmt(Block Owner, MachineInstr *MI) {
+ Stmt SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
SA.Addr->setCode(MI);
Owner.Addr->addMember(SA, *this);
return SA;
}
-NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB) {
- NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+Block DataFlowGraph::newBlock(Func Owner, MachineBasicBlock *BB) {
+ Block BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
BA.Addr->setCode(BB);
Owner.Addr->addMember(BA, *this);
return BA;
}
-NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
- NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+Func DataFlowGraph::newFunc(MachineFunction *MF) {
+ Func FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
FA.Addr->setCode(MF);
return FA;
}
// Build the data flow graph.
-void DataFlowGraph::build(unsigned Options) {
+void DataFlowGraph::build(const Config &config) {
reset();
- Func = newFunc(&MF);
+ BuildCfg = config;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ ReservedRegs = MRI.getReservedRegs();
+ bool SkipReserved = BuildCfg.Options & BuildOptions::OmitReserved;
+
+ auto Insert = [](auto &Set, auto &&Range) {
+ Set.insert(Range.begin(), Range.end());
+ };
+
+ if (BuildCfg.TrackRegs.empty()) {
+ std::set<RegisterId> BaseSet;
+ if (BuildCfg.Classes.empty()) {
+ // Insert every register.
+ for (unsigned R = 0, E = getPRI().getTRI().getNumRegs(); R != E; ++R)
+ BaseSet.insert(R);
+ } else {
+ for (const TargetRegisterClass *RC : BuildCfg.Classes) {
+ for (MCPhysReg R : *RC)
+ BaseSet.insert(R);
+ }
+ }
+ for (RegisterId R : BaseSet) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ } else {
+ // Track set in Config overrides everything.
+ for (unsigned R : BuildCfg.TrackRegs) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ }
+
+ TheFunc = newFunc(&MF);
if (MF.empty())
return;
for (MachineBasicBlock &B : MF) {
- NodeAddr<BlockNode*> BA = newBlock(Func, &B);
+ Block BA = newBlock(TheFunc, &B);
BlockNodes.insert(std::make_pair(&B, BA));
for (MachineInstr &I : B) {
if (I.isDebugInstr())
@@ -883,21 +907,13 @@ void DataFlowGraph::build(unsigned Options) {
}
}
- NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
- NodeList Blocks = Func.Addr->members(*this);
-
- // Collect information about block references.
- RegisterSet AllRefs;
- for (NodeAddr<BlockNode*> BA : Blocks)
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
- AllRefs.insert(RA.Addr->getRegRef(*this));
+ Block EA = TheFunc.Addr->getEntryBlock(*this);
+ NodeList Blocks = TheFunc.Addr->members(*this);
// Collect function live-ins and entry block live-ins.
- MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &EntryB = *EA.Addr->getCode();
assert(EntryB.pred_empty() && "Function entry block has predecessors");
- for (std::pair<unsigned,unsigned> P : MRI.liveins())
+ for (std::pair<unsigned, unsigned> P : MRI.liveins())
LiveIns.insert(RegisterRef(P.first));
if (MRI.tracksLiveness()) {
for (auto I : EntryB.liveins())
@@ -905,12 +921,12 @@ void DataFlowGraph::build(unsigned Options) {
}
// Add function-entry phi nodes for the live-in registers.
- //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) {
- for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
- RegisterRef RR = *I;
- NodeAddr<PhiNode*> PA = newPhi(EA);
+ for (RegisterRef RR : LiveIns.refs()) {
+ if (RR.isReg() && !isTracked(RR)) // isReg is likely guaranteed
+ continue;
+ Phi PA = newPhi(EA);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ Def DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
}
@@ -919,9 +935,9 @@ void DataFlowGraph::build(unsigned Options) {
// branches in the program, or fall-throughs from other blocks. They
// are entered from the exception handling runtime and target's ABI
// may define certain registers as defined on entry to such a block.
- RegisterSet EHRegs = getLandingPadLiveIns();
+ RegisterAggr EHRegs = getLandingPadLiveIns();
if (!EHRegs.empty()) {
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (Block BA : Blocks) {
const MachineBasicBlock &B = *BA.Addr->getCode();
if (!B.isEHPad())
continue;
@@ -932,15 +948,17 @@ void DataFlowGraph::build(unsigned Options) {
Preds.push_back(findBlock(PB));
// Build phi nodes for each live-in.
- for (RegisterRef RR : EHRegs) {
- NodeAddr<PhiNode*> PA = newPhi(BA);
+ for (RegisterRef RR : EHRegs.refs()) {
+ if (RR.isReg() && !isTracked(RR))
+ continue;
+ Phi PA = newPhi(BA);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
// Add def:
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ Def DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
// Add uses (no reaching defs for phi uses):
- for (NodeAddr<BlockNode*> PBA : Preds) {
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+ for (Block PBA : Preds) {
+ PhiUse PUA = newPhiUse(PA, RR, PBA);
PA.Addr->addMember(PUA, *this);
}
}
@@ -949,24 +967,23 @@ void DataFlowGraph::build(unsigned Options) {
// Build a map "PhiM" which will contain, for each block, the set
// of references that will require phi definitions in that block.
- BlockRefsMap PhiM;
- for (NodeAddr<BlockNode*> BA : Blocks)
+ BlockRefsMap PhiM(getPRI());
+ for (Block BA : Blocks)
recordDefsForDF(PhiM, BA);
- for (NodeAddr<BlockNode*> BA : Blocks)
- buildPhis(PhiM, AllRefs, BA);
+ for (Block BA : Blocks)
+ buildPhis(PhiM, BA);
// Link all the refs. This will recursively traverse the dominator tree.
DefStackMap DM;
linkBlockRefs(DM, EA);
// Finally, remove all unused phi nodes.
- if (!(Options & BuildOptions::KeepDeadPhis))
+ if (!(BuildCfg.Options & BuildOptions::KeepDeadPhis))
removeUnusedPhis();
}
RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
- assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
- Register::isPhysicalRegister(Reg));
+ assert(RegisterRef::isRegId(Reg) || RegisterRef::isMaskId(Reg));
assert(Reg != 0);
if (Sub != 0)
Reg = TRI.getSubReg(Reg, Sub);
@@ -977,7 +994,8 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
assert(Op.isReg() || Op.isRegMask());
if (Op.isReg())
return makeRegRef(Op.getReg(), Op.getSubReg());
- return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
+ return RegisterRef(getPRI().getRegMaskId(Op.getRegMask()),
+ LaneBitmask::getAll());
}
// For each stack in the map DefM, push the delimiter for block B on it.
@@ -1006,14 +1024,14 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushAllDefs(Instr IA, DefStackMap &DefM) {
pushClobbers(IA, DefM);
pushDefs(IA, DefM);
}
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushClobbers(Instr IA, DefStackMap &DefM) {
NodeSet Visited;
std::set<RegisterId> Defined;
@@ -1029,35 +1047,37 @@ void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
continue;
NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
+ Def PDA = Rel.front();
RegisterRef RR = PDA.Addr->getRegRef(*this);
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
Defined.insert(RR.Reg);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
// Check that we don't push the same def twice.
assert(A != RR.Reg);
if (!Defined.count(A))
DefM[A].push(DA);
}
// Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
+ for (Node T : Rel)
Visited.insert(T.Id);
}
}
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushDefs(Instr IA, DefStackMap &DefM) {
NodeSet Visited;
#ifndef NDEBUG
std::set<RegisterId> Defined;
@@ -1075,44 +1095,45 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
continue;
NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
+ Def PDA = Rel.front();
RegisterRef RR = PDA.Addr->getRegRef(*this);
#ifndef NDEBUG
// Assert if the register is defined in two or more unrelated defs.
// This could happen if there are two or more def operands defining it.
if (!Defined.insert(RR.Reg).second) {
- MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
- dbgs() << "Multiple definitions of register: "
- << Print(RR, *this) << " in\n " << *MI << "in "
- << printMBBReference(*MI->getParent()) << '\n';
+ MachineInstr *MI = Stmt(IA).Addr->getCode();
+ dbgs() << "Multiple definitions of register: " << Print(RR, *this)
+ << " in\n " << *MI << "in " << printMBBReference(*MI->getParent())
+ << '\n';
llvm_unreachable(nullptr);
}
#endif
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
// Check that we don't push the same def twice.
assert(A != RR.Reg);
DefM[A].push(DA);
}
// Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
+ for (Node T : Rel)
Visited.insert(T.Id);
}
}
// Return the list of all reference nodes related to RA, including RA itself.
// See "getNextRelated" for the meaning of a "related reference".
-NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
+NodeList DataFlowGraph::getRelatedRefs(Instr IA, Ref RA) const {
assert(IA.Id != 0 && RA.Id != 0);
NodeList Refs;
@@ -1128,7 +1149,9 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
void DataFlowGraph::reset() {
Memory.clear();
BlockNodes.clear();
- Func = NodeAddr<FuncNode*>();
+ TrackedUnits.clear();
+ ReservedRegs.clear();
+ TheFunc = Func();
}
// Return the next reference node in the instruction node IA that is related
@@ -1137,36 +1160,38 @@ void DataFlowGraph::reset() {
// characteristics. Specific examples of related nodes are shadow reference
// nodes.
// Return the equivalent of nullptr if there are no more related references.
-NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
+Ref DataFlowGraph::getNextRelated(Instr IA, Ref RA) const {
assert(IA.Id != 0 && RA.Id != 0);
- auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool {
+ auto IsRelated = [this, RA](Ref TA) -> bool {
if (TA.Addr->getKind() != RA.Addr->getKind())
return false;
- if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this))
+ if (!getPRI().equal_to(TA.Addr->getRegRef(*this),
+ RA.Addr->getRegRef(*this))) {
return false;
+ }
return true;
};
- auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- return Related(TA) &&
- &RA.Addr->getOp() == &TA.Addr->getOp();
- };
- auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- if (!Related(TA))
+
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ return IsRelated(TA) && &RA.Addr->getOp() == &TA.Addr->getOp();
+ };
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
+ }
+
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ if (!IsRelated(TA))
return false;
if (TA.Addr->getKind() != NodeAttrs::Use)
return true;
// For phi uses, compare predecessor blocks.
- const NodeAddr<const PhiUseNode*> TUA = TA;
- const NodeAddr<const PhiUseNode*> RUA = RA;
- return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
+ return PhiUse(TA).Addr->getPredecessor() ==
+ PhiUse(RA).Addr->getPredecessor();
};
-
- RegisterRef RR = RA.Addr->getRegRef(*this);
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
- return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
}
// Find the next node related to RA in IA that satisfies condition P.
@@ -1175,12 +1200,11 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
// first element is the element after which such a node should be inserted,
// and the second element is a null-address.
template <typename Predicate>
-std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
-DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const {
+std::pair<Ref, Ref> DataFlowGraph::locateNextRef(Instr IA, Ref RA,
+ Predicate P) const {
assert(IA.Id != 0 && RA.Id != 0);
- NodeAddr<RefNode*> NA;
+ Ref NA;
NodeId Start = RA.Id;
while (true) {
NA = getNextRelated(IA, RA);
@@ -1193,17 +1217,16 @@ DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
if (NA.Id != 0 && NA.Id != Start)
return std::make_pair(RA, NA);
- return std::make_pair(RA, NodeAddr<RefNode*>());
+ return std::make_pair(RA, Ref());
}
// Get the next shadow node in IA corresponding to RA, and optionally create
// such a node if it does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create) {
+Ref DataFlowGraph::getNextShadow(Instr IA, Ref RA, bool Create) {
assert(IA.Id != 0 && RA.Id != 0);
uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ auto IsShadow = [Flags](Ref TA) -> bool {
return TA.Addr->getFlags() == Flags;
};
auto Loc = locateNextRef(IA, RA, IsShadow);
@@ -1211,30 +1234,18 @@ NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
return Loc.second;
// Create a copy of RA and mark is as shadow.
- NodeAddr<RefNode*> NA = cloneNode(RA);
+ Ref NA = cloneNode(RA);
NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
IA.Addr->addMemberAfter(Loc.first, NA, *this);
return NA;
}
-// Get the next shadow node in IA corresponding to RA. Return null-address
-// if such a node does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
- assert(IA.Id != 0 && RA.Id != 0);
- uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getFlags() == Flags;
- };
- return locateNextRef(IA, RA, IsShadow).second;
-}
-
// Create a new statement node in the block node BA that corresponds to
// the machine instruction MI.
-void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
- NodeAddr<StmtNode*> SA = newStmt(BA, &In);
+void DataFlowGraph::buildStmt(Block BA, MachineInstr &In) {
+ Stmt SA = newStmt(BA, &In);
- auto isCall = [] (const MachineInstr &In) -> bool {
+ auto isCall = [](const MachineInstr &In) -> bool {
if (In.isCall())
return true;
// Is tail call?
@@ -1251,14 +1262,14 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
return false;
};
- auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
+ auto isDefUndef = [this](const MachineInstr &In, RegisterRef DR) -> bool {
// This instruction defines DR. Check if there is a use operand that
// would make DR live on entry to the instruction.
- for (const MachineOperand &Op : In.operands()) {
- if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef())
+ for (const MachineOperand &Op : In.all_uses()) {
+ if (Op.getReg() == 0 || Op.isUndef())
continue;
RegisterRef UR = makeRegRef(Op);
- if (PRI.alias(DR, UR))
+ if (getPRI().alias(DR, UR))
return false;
}
return true;
@@ -1278,7 +1289,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical())
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
@@ -1293,7 +1304,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Fixed;
if (IsCall && Op.isDead())
Flags |= NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
assert(!DoneDefs.test(R));
DoneDefs.set(R);
@@ -1305,15 +1316,17 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isRegMask())
continue;
- uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed |
- NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed | NodeAttrs::Dead;
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
// Record all clobbered registers in DoneDefs.
const uint32_t *RM = Op.getRegMask();
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i)
- if (!(RM[i/32] & (1u << (i%32))))
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (!isTracked(RegisterRef(i)))
+ continue;
+ if (!(RM[i / 32] & (1u << (i % 32))))
DoneClobbers.set(i);
+ }
}
// Process implicit defs, skipping those that have already been added
@@ -1323,7 +1336,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical() || DoneDefs.test(R))
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)) || DoneDefs.test(R))
continue;
RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
@@ -1342,7 +1355,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
continue;
Flags |= NodeAttrs::Dead;
}
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
DoneDefs.set(R);
}
@@ -1352,22 +1365,21 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isUse())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical())
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
Flags |= NodeAttrs::Undef;
if (TOI.isFixedReg(In, OpN))
Flags |= NodeAttrs::Fixed;
- NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
+ Use UA = newUse(SA, Op, Flags);
SA.Addr->addMember(UA, *this);
}
}
// Scan all defs in the block node BA and record in PhiM the locations of
// phi nodes corresponding to these defs.
-void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
- NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
// Check all defs from block BA and record them in each block in BA's
// iterated dominance frontier. This information will later be used to
// create phi nodes.
@@ -1382,14 +1394,18 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
// in the block's iterated dominance frontier.
// This is done to make sure that each defined reference gets only one
// phi node, even if it is defined multiple times.
- RegisterSet Defs;
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
- Defs.insert(RA.Addr->getRegRef(*this));
+ RegisterAggr Defs(getPRI());
+ for (Instr IA : BA.Addr->members(*this)) {
+ for (Ref RA : IA.Addr->members_if(IsDef, *this)) {
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (RR.isReg() && isTracked(RR))
+ Defs.insert(RR);
+ }
+ }
// Calculate the iterated dominance frontier of BB.
const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
- SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
+ SetVector<MachineBasicBlock *> IDF(DF.begin(), DF.end());
for (unsigned i = 0; i < IDF.size(); ++i) {
auto F = MDF.find(IDF[i]);
if (F != MDF.end())
@@ -1399,98 +1415,37 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
// Finally, add the set of defs to each block in the iterated dominance
// frontier.
for (auto *DB : IDF) {
- NodeAddr<BlockNode*> DBA = findBlock(DB);
- PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
+ Block DBA = findBlock(DB);
+ PhiM[DBA.Id].insert(Defs);
}
}
// Given the locations of phi nodes in the map PhiM, create the phi nodes
// that are located in the block node BA.
-void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) {
// Check if this blocks has any DF defs, i.e. if there are any defs
// that this block is in the iterated dominance frontier of.
auto HasDF = PhiM.find(BA.Id);
if (HasDF == PhiM.end() || HasDF->second.empty())
return;
- // First, remove all R in Refs in such that there exists T in Refs
- // such that T covers R. In other words, only leave those refs that
- // are not covered by another ref (i.e. maximal with respect to covering).
-
- auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
- for (RegisterRef I : RRs)
- if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI))
- RR = I;
- return RR;
- };
-
- RegisterSet MaxDF;
- for (RegisterRef I : HasDF->second)
- MaxDF.insert(MaxCoverIn(I, HasDF->second));
-
- std::vector<RegisterRef> MaxRefs;
- for (RegisterRef I : MaxDF)
- MaxRefs.push_back(MaxCoverIn(I, AllRefs));
-
- // Now, for each R in MaxRefs, get the alias closure of R. If the closure
- // only has R in it, create a phi a def for R. Otherwise, create a phi,
- // and add a def for each S in the closure.
-
- // Sort the refs so that the phis will be created in a deterministic order.
- llvm::sort(MaxRefs);
- // Remove duplicates.
- auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
- MaxRefs.erase(NewEnd, MaxRefs.end());
-
- auto Aliased = [this,&MaxRefs](RegisterRef RR,
- std::vector<unsigned> &Closure) -> bool {
- for (unsigned I : Closure)
- if (PRI.alias(RR, MaxRefs[I]))
- return true;
- return false;
- };
-
// Prepare a list of NodeIds of the block's predecessors.
NodeList Preds;
const MachineBasicBlock *MBB = BA.Addr->getCode();
for (MachineBasicBlock *PB : MBB->predecessors())
Preds.push_back(findBlock(PB));
- while (!MaxRefs.empty()) {
- // Put the first element in the closure, and then add all subsequent
- // elements from MaxRefs to it, if they alias at least one element
- // already in the closure.
- // ClosureIdx: vector of indices in MaxRefs of members of the closure.
- std::vector<unsigned> ClosureIdx = { 0 };
- for (unsigned i = 1; i != MaxRefs.size(); ++i)
- if (Aliased(MaxRefs[i], ClosureIdx))
- ClosureIdx.push_back(i);
-
- // Build a phi for the closure.
- unsigned CS = ClosureIdx.size();
- NodeAddr<PhiNode*> PA = newPhi(BA);
-
- // Add defs.
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
- PA.Addr->addMember(DA, *this);
- }
+ const RegisterAggr &Defs = PhiM[BA.Id];
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+
+ for (RegisterRef RR : Defs.refs()) {
+ Phi PA = newPhi(BA);
+ PA.Addr->addMember(newDef(PA, RR, PhiFlags), *this);
+
// Add phi uses.
- for (NodeAddr<BlockNode*> PBA : Preds) {
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
- PA.Addr->addMember(PUA, *this);
- }
+ for (Block PBA : Preds) {
+ PA.Addr->addMember(newPhiUse(PA, RR, PBA), *this);
}
-
- // Erase from MaxRefs all elements in the closure.
- auto Begin = MaxRefs.begin();
- for (unsigned Idx : llvm::reverse(ClosureIdx))
- MaxRefs.erase(Begin + Idx);
}
}
@@ -1503,16 +1458,16 @@ void DataFlowGraph::removeUnusedPhis() {
// that are easily determinable to be unnecessary.
SetVector<NodeId> PhiQ;
- for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
+ for (Block BA : TheFunc.Addr->members(*this)) {
for (auto P : BA.Addr->members_if(IsPhi, *this))
PhiQ.insert(P.Id);
}
static auto HasUsedDef = [](NodeList &Ms) -> bool {
- for (NodeAddr<NodeBase*> M : Ms) {
+ for (Node M : Ms) {
if (M.Addr->getKind() != NodeAttrs::Def)
continue;
- NodeAddr<DefNode*> DA = M;
+ Def DA = M;
if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
return true;
}
@@ -1523,15 +1478,15 @@ void DataFlowGraph::removeUnusedPhis() {
// For each removed phi, collect the potentially affected phis and add
// them back to the queue.
while (!PhiQ.empty()) {
- auto PA = addr<PhiNode*>(PhiQ[0]);
+ auto PA = addr<PhiNode *>(PhiQ[0]);
PhiQ.remove(PA.Id);
NodeList Refs = PA.Addr->members(*this);
if (HasUsedDef(Refs))
continue;
- for (NodeAddr<RefNode*> RA : Refs) {
+ for (Ref RA : Refs) {
if (NodeId RD = RA.Addr->getReachingDef()) {
- auto RDA = addr<DefNode*>(RD);
- NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
+ auto RDA = addr<DefNode *>(RD);
+ Instr OA = RDA.Addr->getOwner(*this);
if (IsPhi(OA))
PhiQ.insert(OA.Id);
}
@@ -1540,7 +1495,7 @@ void DataFlowGraph::removeUnusedPhis() {
else
unlinkUse(RA, true);
}
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
+ Block BA = PA.Addr->getOwner(*this);
BA.Addr->removeMember(PA, *this);
}
}
@@ -1549,15 +1504,14 @@ void DataFlowGraph::removeUnusedPhis() {
// reaching def of TA to the appropriate def node. Create any shadow nodes
// as appropriate.
template <typename T>
-void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
- DefStack &DS) {
+void DataFlowGraph::linkRefUp(Instr IA, NodeAddr<T> TA, DefStack &DS) {
if (DS.empty())
return;
RegisterRef RR = TA.Addr->getRegRef(*this);
NodeAddr<T> TAP;
// References from the def stack that have been examined so far.
- RegisterAggr Defs(PRI);
+ RegisterAggr Defs(getPRI());
for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
RegisterRef QR = I->Addr->getRegRef(*this);
@@ -1573,7 +1527,7 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
}
// The reaching def.
- NodeAddr<DefNode*> RDA = *I;
+ Def RDA = *I;
// Pick the reached node.
if (TAP.Id == 0) {
@@ -1594,14 +1548,13 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
// Create data-flow links for all reference nodes in the statement node SA.
template <typename Predicate>
-void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
- Predicate P) {
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P) {
#ifndef NDEBUG
- RegisterSet Defs;
+ RegisterSet Defs(getPRI());
#endif
// Link all nodes (upwards in the data-flow) with their reaching defs.
- for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) {
+ for (Ref RA : SA.Addr->members_if(P, *this)) {
uint16_t Kind = RA.Addr->getKind();
assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
RegisterRef RR = RA.Addr->getRegRef(*this);
@@ -1616,9 +1569,9 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
continue;
DefStack &DS = F->second;
if (Kind == NodeAttrs::Use)
- linkRefUp<UseNode*>(SA, RA, DS);
+ linkRefUp<UseNode *>(SA, RA, DS);
else if (Kind == NodeAttrs::Def)
- linkRefUp<DefNode*>(SA, RA, DS);
+ linkRefUp<DefNode *>(SA, RA, DS);
else
llvm_unreachable("Unexpected node in instruction");
}
@@ -1626,14 +1579,14 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
// Create data-flow links for all instructions in the block node BA. This
// will include updating any phi nodes in BA.
-void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) {
// Push block delimiters.
markBlock(BA.Id, DefM);
- auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ auto IsClobber = [](Ref RA) -> bool {
return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
};
- auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ auto IsNoClobber = [](Ref RA) -> bool {
return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
};
@@ -1641,7 +1594,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
// For each non-phi instruction in the block, link all the defs and uses
// to their reaching defs. For any member of the block (including phis),
// push the defs on the corresponding stacks.
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
+ for (Instr IA : BA.Addr->members(*this)) {
// Ignore phi nodes here. They will be linked part by part from the
// predecessors.
if (IA.Addr->getKind() == NodeAttrs::Stmt) {
@@ -1662,39 +1615,38 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
for (auto *I : *N) {
MachineBasicBlock *SB = I->getBlock();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
+ Block SBA = findBlock(SB);
linkBlockRefs(DefM, SBA);
}
// Link the phi uses from the successor blocks.
- auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
+ auto IsUseForBA = [BA](Node NA) -> bool {
if (NA.Addr->getKind() != NodeAttrs::Use)
return false;
assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
- NodeAddr<PhiUseNode*> PUA = NA;
- return PUA.Addr->getPredecessor() == BA.Id;
+ return PhiUse(NA).Addr->getPredecessor() == BA.Id;
};
- RegisterSet EHLiveIns = getLandingPadLiveIns();
+ RegisterAggr EHLiveIns = getLandingPadLiveIns();
MachineBasicBlock *MBB = BA.Addr->getCode();
for (MachineBasicBlock *SB : MBB->successors()) {
bool IsEHPad = SB->isEHPad();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
- for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+ Block SBA = findBlock(SB);
+ for (Instr IA : SBA.Addr->members_if(IsPhi, *this)) {
// Do not link phi uses for landing pad live-ins.
if (IsEHPad) {
// Find what register this phi is for.
- NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this);
+ Ref RA = IA.Addr->getFirstMember(*this);
assert(RA.Id != 0);
- if (EHLiveIns.count(RA.Addr->getRegRef(*this)))
+ if (EHLiveIns.hasCoverOf(RA.Addr->getRegRef(*this)))
continue;
}
// Go over each phi use associated with MBB, and link it.
for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
- NodeAddr<PhiUseNode*> PUA = U;
+ PhiUse PUA = U;
RegisterRef RR = PUA.Addr->getRegRef(*this);
- linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]);
+ linkRefUp<UseNode *>(IA, PUA, DefM[RR.Reg]);
}
}
}
@@ -1704,7 +1656,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
}
// Remove the use node UA from any data-flow and structural links.
-void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
+void DataFlowGraph::unlinkUseDF(Use UA) {
NodeId RD = UA.Addr->getReachingDef();
NodeId Sib = UA.Addr->getSibling();
@@ -1713,8 +1665,8 @@ void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
return;
}
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<UseNode *>(RDA.Addr->getReachedUse());
if (TA.Id == UA.Id) {
RDA.Addr->setReachedUse(Sib);
return;
@@ -1726,12 +1678,12 @@ void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
TA.Addr->setSibling(UA.Addr->getSibling());
return;
}
- TA = addr<UseNode*>(S);
+ TA = addr<UseNode *>(S);
}
}
// Remove the def node DA from any data-flow and structural links.
-void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
+void DataFlowGraph::unlinkDefDF(Def DA) {
//
// RD
// | reached
@@ -1756,10 +1708,10 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
// Also, defs reached by DA are now "promoted" to being reached by RD,
// so all of them will need to be spliced into the sibling chain where
// DA belongs.
- auto getAllNodes = [this] (NodeId N) -> NodeList {
+ auto getAllNodes = [this](NodeId N) -> NodeList {
NodeList Res;
while (N) {
- auto RA = addr<RefNode*>(N);
+ auto RA = addr<RefNode *>(N);
// Keep the nodes in the exact sibling order.
Res.push_back(RA);
N = RA.Addr->getSibling();
@@ -1770,14 +1722,14 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
if (RD == 0) {
- for (NodeAddr<RefNode*> I : ReachedDefs)
+ for (Ref I : ReachedDefs)
I.Addr->setSibling(0);
- for (NodeAddr<RefNode*> I : ReachedUses)
+ for (Ref I : ReachedUses)
I.Addr->setSibling(0);
}
- for (NodeAddr<DefNode*> I : ReachedDefs)
+ for (Def I : ReachedDefs)
I.Addr->setReachingDef(RD);
- for (NodeAddr<UseNode*> I : ReachedUses)
+ for (Use I : ReachedUses)
I.Addr->setReachingDef(RD);
NodeId Sib = DA.Addr->getSibling();
@@ -1787,8 +1739,8 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
}
// Update the reaching def node and remove DA from the sibling list.
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<DefNode *>(RDA.Addr->getReachedDef());
if (TA.Id == DA.Id) {
// If DA is the first reached def, just update the RD's reached def
// to the DA's sibling.
@@ -1802,20 +1754,46 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
TA.Addr->setSibling(Sib);
break;
}
- TA = addr<DefNode*>(S);
+ TA = addr<DefNode *>(S);
}
}
// Splice the DA's reached defs into the RDA's reached def chain.
if (!ReachedDefs.empty()) {
- auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
+ auto Last = Def(ReachedDefs.back());
Last.Addr->setSibling(RDA.Addr->getReachedDef());
RDA.Addr->setReachedDef(ReachedDefs.front().Id);
}
// Splice the DA's reached uses into the RDA's reached use chain.
if (!ReachedUses.empty()) {
- auto Last = NodeAddr<UseNode*>(ReachedUses.back());
+ auto Last = Use(ReachedUses.back());
Last.Addr->setSibling(RDA.Addr->getReachedUse());
RDA.Addr->setReachedUse(ReachedUses.front().Id);
}
}
+
+bool DataFlowGraph::isTracked(RegisterRef RR) const {
+ return !disjoint(getPRI().getUnits(RR), TrackedUnits);
+}
+
+bool DataFlowGraph::hasUntrackedRef(Stmt S, bool IgnoreReserved) const {
+ SmallVector<MachineOperand *> Ops;
+
+ for (Ref R : S.Addr->members(*this)) {
+ Ops.push_back(&R.Addr->getOp());
+ RegisterRef RR = R.Addr->getRegRef(*this);
+ if (IgnoreReserved && RR.isReg() && ReservedRegs[RR.idx()])
+ continue;
+ if (!isTracked(RR))
+ return true;
+ }
+ for (const MachineOperand &Op : S.Addr->getCode()->operands()) {
+ if (!Op.isReg() && !Op.isRegMask())
+ continue;
+ if (llvm::find(Ops, &Op) == Ops.end())
+ return true;
+ }
+ return false;
+}
+
+} // end namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index 902b29d41ce1..11f3fedaa5f9 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,7 +22,6 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
-#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,6 +33,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
@@ -51,31 +51,27 @@
#include <vector>
using namespace llvm;
-using namespace rdf;
static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
- cl::Hidden, cl::desc("Maximum recursion level"));
-
-namespace llvm {
-namespace rdf {
-
- raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
- OS << '{';
- for (const auto &I : P.Obj) {
- OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
- for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
- OS << Print(J->first, P.G) << PrintLaneMaskOpt(J->second);
- if (++J != E)
- OS << ',';
- }
- OS << '}';
+ cl::Hidden,
+ cl::desc("Maximum recursion level"));
+
+namespace llvm::rdf {
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+ OS << '{';
+ for (const auto &I : P.Obj) {
+ OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
+ for (auto J = I.second.begin(), E = I.second.end(); J != E;) {
+ OS << Print(J->first, P.G) << PrintLaneMaskShort(J->second);
+ if (++J != E)
+ OS << ',';
}
- OS << " }";
- return OS;
+ OS << '}';
}
-
-} // end namespace rdf
-} // end namespace llvm
+ OS << " }";
+ return OS;
+}
// The order in the returned sequence is the order of reaching defs in the
// upward traversal: the first def is the closest to the given reference RefA,
@@ -106,11 +102,12 @@ namespace rdf {
// the data-flow.
NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain,
- const RegisterAggr &DefRRs) {
+ NodeAddr<RefNode *> RefA, bool TopShadows,
+ bool FullChain,
+ const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
- DenseMap<MachineInstr*, uint32_t> OrdMap;
+ DenseMap<MachineInstr *, uint32_t> OrdMap;
// Dead defs will be treated as if they were live, since they are actually
// on the data-flow path. They cannot be ignored because even though they
@@ -124,12 +121,12 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// whole point of a shadow is that it will have a reaching def that
// is not aliased to the reaching defs of the related shadows.
NodeId Start = RefA.Id;
- auto SNA = DFG.addr<RefNode*>(Start);
+ auto SNA = DFG.addr<RefNode *>(Start);
if (NodeId RD = SNA.Addr->getReachingDef())
DefQ.insert(RD);
if (TopShadows) {
for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
DefQ.insert(RD);
}
@@ -140,7 +137,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// It is possible that a collection of non-covering (individually) defs
// will be sufficient, but keep going until a covering one is found.
for (unsigned i = 0; i < DefQ.size(); ++i) {
- auto TA = DFG.addr<DefNode*>(DefQ[i]);
+ auto TA = DFG.addr<DefNode *>(DefQ[i]);
if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
continue;
// Stop at the covering/overwriting def of the initial register reference.
@@ -151,7 +148,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// Get the next level of reaching defs. This will include multiple
// reaching defs for shadows.
for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
DefQ.insert(RD);
// Don't visit sibling defs. They share the same reaching def (which
// will be visited anyway), but they define something not aliased to
@@ -159,42 +156,42 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
// Return the MachineBasicBlock containing a given instruction.
- auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
+ auto Block = [this](NodeAddr<InstrNode *> IA) -> MachineBasicBlock * {
if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
+ return NodeAddr<StmtNode *>(IA).Addr->getCode()->getParent();
assert(IA.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<PhiNode*> PA = IA;
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
+ NodeAddr<PhiNode *> PA = IA;
+ NodeAddr<BlockNode *> BA = PA.Addr->getOwner(DFG);
return BA.Addr->getCode();
};
- SmallSet<NodeId,32> Defs;
+ SmallSet<NodeId, 32> Defs;
// Remove all non-phi defs that are not aliased to RefRR, and separate
// the the remaining defs into buckets for containing blocks.
- std::map<NodeId, NodeAddr<InstrNode*>> Owners;
- std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
+ std::map<NodeId, NodeAddr<InstrNode *>> Owners;
+ std::map<MachineBasicBlock *, SmallVector<NodeId, 32>> Blocks;
for (NodeId N : DefQ) {
- auto TA = DFG.addr<DefNode*>(N);
+ auto TA = DFG.addr<DefNode *>(N);
bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
continue;
Defs.insert(TA.Id);
- NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ NodeAddr<InstrNode *> IA = TA.Addr->getOwner(DFG);
Owners[TA.Id] = IA;
Blocks[Block(IA)].push_back(IA.Id);
}
- auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) {
+ auto Precedes = [this, &OrdMap](NodeId A, NodeId B) {
if (A == B)
return false;
- NodeAddr<InstrNode*> OA = DFG.addr<InstrNode*>(A);
- NodeAddr<InstrNode*> OB = DFG.addr<InstrNode*>(B);
+ NodeAddr<InstrNode *> OA = DFG.addr<InstrNode *>(A);
+ NodeAddr<InstrNode *> OB = DFG.addr<InstrNode *>(B);
bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
if (StmtA && StmtB) {
- const MachineInstr *InA = NodeAddr<StmtNode*>(OA).Addr->getCode();
- const MachineInstr *InB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ const MachineInstr *InA = NodeAddr<StmtNode *>(OA).Addr->getCode();
+ const MachineInstr *InB = NodeAddr<StmtNode *>(OB).Addr->getCode();
assert(InA->getParent() == InB->getParent());
auto FA = OrdMap.find(InA);
if (FA != OrdMap.end())
@@ -217,14 +214,14 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return !StmtA;
};
- auto GetOrder = [&OrdMap] (MachineBasicBlock &B) {
+ auto GetOrder = [&OrdMap](MachineBasicBlock &B) {
uint32_t Pos = 0;
for (MachineInstr &In : B)
OrdMap.insert({&In, ++Pos});
};
// For each block, sort the nodes in it.
- std::vector<MachineBasicBlock*> TmpBB;
+ std::vector<MachineBasicBlock *> TmpBB;
for (auto &Bucket : Blocks) {
TmpBB.push_back(Bucket.first);
if (Bucket.second.size() > 2)
@@ -261,18 +258,17 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
RegisterAggr RRs(DefRRs);
- auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getKind() == NodeAttrs::Def &&
- Defs.count(TA.Id);
+ auto DefInSet = [&Defs](NodeAddr<RefNode *> TA) -> bool {
+ return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id);
};
for (NodeId T : TmpInst) {
if (!FullChain && RRs.hasCoverOf(RefRR))
break;
- auto TA = DFG.addr<InstrNode*>(T);
+ auto TA = DFG.addr<InstrNode *>(T);
bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
NodeList Ds;
- for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
+ for (NodeAddr<DefNode *> DA : TA.Addr->members_if(DefInSet, DFG)) {
RegisterRef QR = DA.Addr->getRegRef(DFG);
// Add phi defs even if they are covered by subsequent defs. This is
// for cases where the reached use is not covered by any of the defs
@@ -286,7 +282,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
Ds.push_back(DA);
}
llvm::append_range(RDefs, Ds);
- for (NodeAddr<DefNode*> DA : Ds) {
+ for (NodeAddr<DefNode *> DA : Ds) {
// When collecting a full chain of definitions, do not consider phi
// defs to actually define a register.
uint16_t Flags = DA.Addr->getFlags();
@@ -296,7 +292,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
}
- auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
+ auto DeadP = [](const NodeAddr<DefNode *> DA) -> bool {
return DA.Addr->getFlags() & NodeAttrs::Dead;
};
llvm::erase_if(RDefs, DeadP);
@@ -304,81 +300,82 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return RDefs;
}
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs) {
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs) {
return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
}
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) {
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest) {
if (Nest > MaxNest)
- return { NodeSet(), false };
+ return {NodeSet(), false};
// Collect all defined registers. Do not consider phis to be defining
// anything, only collect "real" definitions.
RegisterAggr DefRRs(PRI);
for (NodeId D : Defs) {
- const auto DA = DFG.addr<const DefNode*>(D);
+ const auto DA = DFG.addr<const DefNode *>(D);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
DefRRs.insert(DA.Addr->getRegRef(DFG));
}
NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
if (RDs.empty())
- return { Defs, true };
+ return {Defs, true};
// Make a copy of the preexisting definitions and add the newly found ones.
NodeSet TmpDefs = Defs;
- for (NodeAddr<NodeBase*> R : RDs)
+ for (NodeAddr<NodeBase *> R : RDs)
TmpDefs.insert(R.Id);
NodeSet Result = Defs;
- for (NodeAddr<DefNode*> DA : RDs) {
+ for (NodeAddr<DefNode *> DA : RDs) {
Result.insert(DA.Id);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
continue;
- NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG);
+ NodeAddr<PhiNode *> PA = DA.Addr->getOwner(DFG);
if (!Visited.insert(PA.Id).second)
continue;
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
- Nest+1, MaxNest);
+ Nest + 1, MaxNest);
if (!T.second)
- return { T.first, false };
+ return {T.first, false};
Result.insert(T.first.begin(), T.first.end());
}
}
- return { Result, true };
+ return {Result, true};
}
/// Find the nearest ref node aliased to RefRR, going upwards in the data
/// flow, starting from the instruction immediately preceding Inst.
-NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA) {
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+NodeAddr<RefNode *> Liveness::getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode *> IA) {
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
NodeList Ins = BA.Addr->members(DFG);
NodeId FindId = IA.Id;
auto E = Ins.rend();
- auto B = std::find_if(Ins.rbegin(), E,
- [FindId] (const NodeAddr<InstrNode*> T) {
- return T.Id == FindId;
- });
+ auto B =
+ std::find_if(Ins.rbegin(), E, [FindId](const NodeAddr<InstrNode *> T) {
+ return T.Id == FindId;
+ });
// Do not scan IA (which is what B would point to).
if (B != E)
++B;
do {
// Process the range of instructions from B to E.
- for (NodeAddr<InstrNode*> I : make_range(B, E)) {
+ for (NodeAddr<InstrNode *> I : make_range(B, E)) {
NodeList Refs = I.Addr->members(DFG);
- NodeAddr<RefNode*> Clob, Use;
+ NodeAddr<RefNode *> Clob, Use;
// Scan all the refs in I aliased to RefRR, and return the one that
// is the closest to the output of I, i.e. def > clobber > use.
- for (NodeAddr<RefNode*> R : Refs) {
+ for (NodeAddr<RefNode *> R : Refs) {
if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
continue;
if (DFG.IsDef(R)) {
@@ -398,7 +395,7 @@ NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
// Go up to the immediate dominator, if any.
MachineBasicBlock *BB = BA.Addr->getCode();
- BA = NodeAddr<BlockNode*>();
+ BA = NodeAddr<BlockNode *>();
if (MachineDomTreeNode *N = MDT.getNode(BB)) {
if ((N = N->getIDom()))
BA = DFG.findBlock(N->getBlock());
@@ -411,11 +408,11 @@ NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
E = Ins.rend();
} while (true);
- return NodeAddr<RefNode*>();
+ return NodeAddr<RefNode *>();
}
-NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
- NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) {
+NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode *> DefA,
+ const RegisterAggr &DefRRs) {
NodeSet Uses;
// If the original register is already covered by all the intervening
@@ -428,7 +425,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead;
NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0;
while (U != 0) {
- auto UA = DFG.addr<UseNode*>(U);
+ auto UA = DFG.addr<UseNode *>(U);
if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
RegisterRef UR = UA.Addr->getRegRef(DFG);
if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
@@ -439,7 +436,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
// Traverse all reached defs. This time dead defs cannot be ignored.
for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) {
- auto DA = DFG.addr<DefNode*>(D);
+ auto DA = DFG.addr<DefNode *>(D);
NextD = DA.Addr->getSibling();
RegisterRef DR = DA.Addr->getRegRef(DFG);
// If this def is already covered, it cannot reach anything new.
@@ -464,20 +461,21 @@ void Liveness::computePhiInfo() {
RealUseMap.clear();
NodeList Phis;
- NodeAddr<FuncNode*> FA = DFG.getFunc();
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
NodeList Blocks = FA.Addr->members(DFG);
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
llvm::append_range(Phis, Ps);
}
// phi use -> (map: reaching phi -> set of registers defined in between)
- std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
- std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
- std::unordered_map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
+ std::map<NodeId, std::map<NodeId, RegisterAggr>> PhiUp;
+ std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+ std::unordered_map<NodeId, RegisterAggr>
+ PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
- for (NodeAddr<PhiNode*> PhiA : Phis) {
+ for (NodeAddr<PhiNode *> PhiA : Phis) {
// Go over all defs and collect the reached uses that are non-phi uses
// (i.e. the "real uses").
RefMap &RealUses = RealUseMap[PhiA.Id];
@@ -488,7 +486,7 @@ void Liveness::computePhiInfo() {
SetVector<NodeId> DefQ;
NodeSet PhiDefs;
RegisterAggr DRs(PRI);
- for (NodeAddr<RefNode*> R : PhiRefs) {
+ for (NodeAddr<RefNode *> R : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Def>(R))
continue;
DRs.insert(R.Addr->getRegRef(DFG));
@@ -503,17 +501,17 @@ void Liveness::computePhiInfo() {
// This set of uses will later be trimmed to only contain these uses that
// are actually reached by the phi defs.
for (unsigned i = 0; i < DefQ.size(); ++i) {
- NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
+ NodeAddr<DefNode *> DA = DFG.addr<DefNode *>(DefQ[i]);
// Visit all reached uses. Phi defs should not really have the "dead"
// flag set, but check it anyway for consistency.
bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead;
NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0;
while (UN != 0) {
- NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
+ NodeAddr<UseNode *> A = DFG.addr<UseNode *>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
RegisterRef R = A.Addr->getRegRef(DFG);
- RealUses[R.Reg].insert({A.Id,R.Mask});
+ RealUses[R.Reg].insert({A.Id, R.Mask});
}
UN = A.Addr->getSibling();
}
@@ -522,9 +520,9 @@ void Liveness::computePhiInfo() {
// later.
NodeId DN = DA.Addr->getReachedDef();
while (DN != 0) {
- NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
+ NodeAddr<DefNode *> A = DFG.addr<DefNode *>(DN);
for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
- uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
+ uint16_t Flags = NodeAddr<DefNode *>(T).Addr->getFlags();
// Must traverse the reached-def chain. Consider:
// def(D0) -> def(R0) -> def(R0) -> use(D0)
// The reachable use of D0 passes through a def of R0.
@@ -546,21 +544,25 @@ void Liveness::computePhiInfo() {
// = R1:0 u6 Not reached by d1 (covered collectively
// by d3 and d5), but following reached
// defs and uses from d1 will lead here.
- for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
+ for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE;) {
// For each reached register UI->first, there is a set UI->second, of
// uses of it. For each such use, check if it is reached by this phi,
// i.e. check if the set of its reaching uses intersects the set of
// this phi's defs.
NodeRefSet Uses = UI->second;
UI->second.clear();
- for (std::pair<NodeId,LaneBitmask> I : Uses) {
- auto UA = DFG.addr<UseNode*>(I.first);
+ for (std::pair<NodeId, LaneBitmask> I : Uses) {
+ auto UA = DFG.addr<UseNode *>(I.first);
// Undef flag is checked above.
assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
- RegisterRef R(UI->first, I.second);
+ RegisterRef UseR(UI->first, I.second); // Ref from Uses
+ // R = intersection of the ref from the phi and the ref from Uses
+ RegisterRef R = PhiDRs.at(PhiA.Id).intersectWith(UseR);
+ if (!R)
+ continue;
// Calculate the exposed part of the reached use.
RegisterAggr Covered(PRI);
- for (NodeAddr<DefNode*> DA : getAllReachingDefs(R, UA)) {
+ for (NodeAddr<DefNode *> DA : getAllReachingDefs(R, UA)) {
if (PhiDefs.count(DA.Id))
break;
Covered.insert(DA.Addr->getRegRef(DFG));
@@ -590,7 +592,7 @@ void Liveness::computePhiInfo() {
for (auto I : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
continue;
- NodeAddr<PhiUseNode*> PUA = I;
+ NodeAddr<PhiUseNode *> PUA = I;
if (PUA.Addr->getReachingDef() == 0)
continue;
@@ -598,10 +600,10 @@ void Liveness::computePhiInfo() {
NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
RegisterAggr DefRRs(PRI);
- for (NodeAddr<DefNode*> D : Ds) {
+ for (NodeAddr<DefNode *> D : Ds) {
if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
NodeId RP = D.Addr->getOwner(DFG).Id;
- std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id];
+ std::map<NodeId, RegisterAggr> &M = PhiUp[PUA.Id];
auto F = M.find(RP);
if (F == M.end())
M.insert(std::make_pair(RP, DefRRs));
@@ -611,7 +613,7 @@ void Liveness::computePhiInfo() {
DefRRs.insert(D.Addr->getRegRef(DFG));
}
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA))
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PhiA, PUA))
SeenUses.insert(T.Id);
}
}
@@ -652,9 +654,11 @@ void Liveness::computePhiInfo() {
// The operation "clearIn" can be expensive. For a given set of intervening
// defs, cache the result of subtracting these defs from a given register
// ref.
+ using RefHash = std::hash<RegisterRef>;
+ using RefEqual = std::equal_to<RegisterRef>;
using SubMap = std::unordered_map<RegisterRef, RegisterRef>;
std::unordered_map<RegisterAggr, SubMap> Subs;
- auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
+ auto ClearIn = [](RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
if (Mid.empty())
return RR;
auto F = SM.find(RR);
@@ -667,12 +671,12 @@ void Liveness::computePhiInfo() {
// Go over all phis.
for (unsigned i = 0; i < PhiUQ.size(); ++i) {
- auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
+ auto PA = DFG.addr<PhiNode *>(PhiUQ[i]);
NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
RefMap &RUM = RealUseMap[PA.Id];
- for (NodeAddr<UseNode*> UA : PUs) {
- std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
+ for (NodeAddr<UseNode *> UA : PUs) {
+ std::map<NodeId, RegisterAggr> &PUM = PhiUp[UA.Id];
RegisterRef UR = UA.Addr->getRegRef(DFG);
for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
bool Changed = false;
@@ -683,7 +687,10 @@ void Liveness::computePhiInfo() {
if (MidDefs.hasCoverOf(UR))
continue;
- SubMap &SM = Subs[MidDefs];
+ if (Subs.find(MidDefs) == Subs.end()) {
+ Subs.insert({MidDefs, SubMap(1, RefHash(), RefEqual(PRI))});
+ }
+ SubMap &SM = Subs.at(MidDefs);
// General algorithm:
// for each (R,U) : U is use node of R, U is reached by PA
@@ -699,13 +706,13 @@ void Liveness::computePhiInfo() {
if (!DRs.hasAliasOf(R))
continue;
R = PRI.mapTo(DRs.intersectWith(R), T.first);
- for (std::pair<NodeId,LaneBitmask> V : T.second) {
+ for (std::pair<NodeId, LaneBitmask> V : T.second) {
LaneBitmask M = R.Mask & V.second;
if (M.none())
continue;
if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
- Changed |= RS.insert({V.first,SS.Mask}).second;
+ Changed |= RS.insert({V.first, SS.Mask}).second;
}
}
}
@@ -720,10 +727,10 @@ void Liveness::computePhiInfo() {
dbgs() << "Real use map:\n";
for (auto I : RealUseMap) {
dbgs() << "phi " << Print(I.first, DFG);
- NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
+ NodeAddr<PhiNode *> PA = DFG.addr<PhiNode *>(I.first);
NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
if (!Ds.empty()) {
- RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG);
+ RegisterRef RR = NodeAddr<DefNode *>(Ds[0]).Addr->getRegRef(DFG);
dbgs() << '<' << Print(RR, DFG) << '>';
} else {
dbgs() << "<noreg>";
@@ -737,10 +744,10 @@ void Liveness::computeLiveIns() {
// Populate the node-to-block map. This speeds up the calculations
// significantly.
NBMap.clear();
- for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
MachineBasicBlock *BB = BA.Addr->getCode();
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
- for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ for (NodeAddr<InstrNode *> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode *> RA : IA.Addr->members(DFG))
NBMap.insert(std::make_pair(RA.Id, BB));
NBMap.insert(std::make_pair(IA.Id, BB));
}
@@ -754,7 +761,7 @@ void Liveness::computeLiveIns() {
auto F1 = MDF.find(&B);
if (F1 == MDF.end())
continue;
- SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
+ SetVector<MachineBasicBlock *> IDFB(F1->second.begin(), F1->second.end());
for (unsigned i = 0; i < IDFB.size(); ++i) {
auto F2 = MDF.find(IDFB[i]);
if (F2 != MDF.end())
@@ -771,16 +778,17 @@ void Liveness::computeLiveIns() {
computePhiInfo();
- NodeAddr<FuncNode*> FA = DFG.getFunc();
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
NodeList Blocks = FA.Addr->members(DFG);
// Build the phi live-on-entry map.
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
MachineBasicBlock *MB = BA.Addr->getCode();
RefMap &LON = PhiLON[MB];
- for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
+ for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) {
for (const RefMap::value_type &S : RealUseMap[P.Id])
LON[S.first].insert(S.second.begin(), S.second.end());
+ }
}
if (Trace) {
@@ -793,9 +801,9 @@ void Liveness::computeLiveIns() {
// Build the phi live-on-exit map. Each phi node has some set of reached
// "real" uses. Propagate this set backwards into the block predecessors
// through the reaching defs of the corresponding phi uses.
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- for (NodeAddr<PhiNode*> PA : Phis) {
+ for (NodeAddr<PhiNode *> PA : Phis) {
RefMap &RUs = RealUseMap[PA.Id];
if (RUs.empty())
continue;
@@ -804,7 +812,7 @@ void Liveness::computeLiveIns() {
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
if (!SeenUses.insert(U.Id).second)
continue;
- NodeAddr<PhiUseNode*> PUA = U;
+ NodeAddr<PhiUseNode *> PUA = U;
if (PUA.Addr->getReachingDef() == 0)
continue;
@@ -819,18 +827,18 @@ void Liveness::computeLiveIns() {
// For each reached "real" use, identify the set of reaching defs
// coming from each predecessor P, and add them to PhiLOX[P].
//
- auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
+ auto PrA = DFG.addr<BlockNode *>(PUA.Addr->getPredecessor());
RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) {
// We need to visit each individual use.
- for (std::pair<NodeId,LaneBitmask> P : RS.second) {
+ for (std::pair<NodeId, LaneBitmask> P : RS.second) {
// Create a register ref corresponding to the use, and find
// all reaching defs starting from the phi use, and treating
// all related shadows as a single use cluster.
RegisterRef S(RS.first, P.second);
NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
- for (NodeAddr<DefNode*> D : Ds) {
+ for (NodeAddr<DefNode *> D : Ds) {
// Calculate the mask corresponding to the visited def.
RegisterAggr TA(PRI);
TA.insert(D.Addr->getRegRef(DFG)).intersect(S);
@@ -840,11 +848,11 @@ void Liveness::computeLiveIns() {
}
}
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA))
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PA, PUA))
SeenUses.insert(T.Id);
- } // for U : phi uses
- } // for P : Phis
- } // for B : Blocks
+ } // for U : phi uses
+ } // for P : Phis
+ } // for B : Blocks
if (Trace) {
dbgs() << "Phi live-on-exit map:\n";
@@ -865,23 +873,21 @@ void Liveness::computeLiveIns() {
std::vector<RegisterRef> LV;
for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
LV.push_back(RegisterRef(LI.PhysReg, LI.LaneMask));
- llvm::sort(LV);
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
- //dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
+ // dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
LV.clear();
- const RegisterAggr &LG = LiveMap[&B];
- for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
- LV.push_back(*I);
- llvm::sort(LV);
+ for (RegisterRef RR : LiveMap[&B].refs())
+ LV.push_back(RR);
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
dbgs() << "\tcomp = {";
for (auto I : LV)
dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
-
}
}
}
@@ -896,7 +902,7 @@ void Liveness::resetLiveIns() {
B.removeLiveIn(I);
// Add the newly computed live-ins.
const RegisterAggr &LiveIns = LiveMap[&B];
- for (const RegisterRef R : make_range(LiveIns.rr_begin(), LiveIns.rr_end()))
+ for (RegisterRef R : LiveIns.refs())
B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
}
}
@@ -907,7 +913,7 @@ void Liveness::resetKills() {
}
void Liveness::resetKills(MachineBasicBlock *B) {
- auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void {
+ auto CopyLiveIns = [this](MachineBasicBlock *B, BitVector &LV) -> void {
for (auto I : B->liveins()) {
MCSubRegIndexIterator S(I.PhysReg, &TRI);
if (!S.isValid()) {
@@ -933,21 +939,21 @@ void Liveness::resetKills(MachineBasicBlock *B) {
continue;
MI.clearKillInfo();
- for (auto &Op : MI.operands()) {
+ for (auto &Op : MI.all_defs()) {
// An implicit def of a super-register may not necessarily start a
// live range of it, since an implicit use could be used to keep parts
// of it live. Instead of analyzing the implicit operands, ignore
// implicit defs.
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ if (Op.isImplicit())
continue;
Register R = Op.getReg();
if (!R.isPhysical())
continue;
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.reset(*SR);
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.reset(SR);
}
- for (auto &Op : MI.operands()) {
- if (!Op.isReg() || !Op.isUse() || Op.isUndef())
+ for (auto &Op : MI.all_uses()) {
+ if (Op.isUndef())
continue;
Register R = Op.getReg();
if (!R.isPhysical())
@@ -961,8 +967,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
}
if (!IsLive)
Op.setIsKill(true);
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.set(*SR);
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.set(SR);
}
}
}
@@ -1048,9 +1054,9 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
const NodeRefSet &OldDefs = LE.second;
for (NodeRef OR : OldDefs) {
// R is a def node that was live-on-exit
- auto DA = DFG.addr<DefNode*>(OR.first);
- NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ auto DA = DFG.addr<DefNode *>(OR.first);
+ NodeAddr<InstrNode *> IA = DA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
if (B != BA.Addr->getCode()) {
// Defs from a different block need to be preserved. Defs from this
// block will need to be processed further, except for phi defs, the
@@ -1081,10 +1087,10 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// There could be other defs in this block that are a part of that
// chain. Check that now: accumulate the registers from these defs,
// and if they all together cover LRef, it is not live-on-entry.
- for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
+ for (NodeAddr<DefNode *> TA : getAllReachingDefs(DA)) {
// DefNode -> InstrNode -> BlockNode.
- NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG);
+ NodeAddr<InstrNode *> ITA = TA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BTA = ITA.Addr->getOwner(DFG);
// Reaching defs are ordered in the upward direction.
if (BTA.Addr->getCode() != B) {
// We have reached past the beginning of B, and the accumulated
@@ -1093,7 +1099,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// Subtract all accumulated defs (RRs) from LRef.
RegisterRef T = RRs.clearIn(LRef);
assert(T);
- NewDefs.insert({TA.Id,T.Mask});
+ NewDefs.insert({TA.Id, T.Mask});
break;
}
@@ -1118,16 +1124,16 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// Scan the block for upward-exposed uses and add them to the tracking set.
for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
- NodeAddr<InstrNode*> IA = I;
+ NodeAddr<InstrNode *> IA = I;
if (IA.Addr->getKind() != NodeAttrs::Stmt)
continue;
- for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ for (NodeAddr<UseNode *> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
RegisterRef RR = UA.Addr->getRegRef(DFG);
- for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
+ for (NodeAddr<DefNode *> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
- LiveIn[RR.Reg].insert({D.Id,RR.Mask});
+ LiveIn[RR.Reg].insert({D.Id, RR.Mask});
}
}
@@ -1145,7 +1151,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
LaneBitmask M;
for (auto P : R.second)
M |= P.second;
- Local.insert(RegisterRef(R.first,M));
+ Local.insert(RegisterRef(R.first, M));
}
if (Trace) {
@@ -1164,6 +1170,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
}
void Liveness::emptify(RefMap &M) {
- for (auto I = M.begin(), E = M.end(); I != E; )
+ for (auto I = M.begin(), E = M.end(); I != E;)
I = I->second.empty() ? M.erase(I) : std::next(I);
}
+
+} // namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp
index 8760ba118934..90520c4c3c71 100644
--- a/llvm/lib/CodeGen/RDFRegisters.cpp
+++ b/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -15,17 +15,18 @@
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <set>
#include <utility>
-using namespace llvm;
-using namespace rdf;
+namespace llvm::rdf {
PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf)
+ const MachineFunction &mf)
: TRI(tri) {
RegInfos.resize(TRI.getNumRegs());
@@ -57,7 +58,7 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
UnitInfos[U].Reg = F;
} else {
for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
+ std::pair<uint32_t, LaneBitmask> P = *I;
UnitInfo &UI = UnitInfos[P.first];
UI.Reg = F;
if (P.second.any()) {
@@ -80,15 +81,15 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
if (Op.isRegMask())
RegMasks.insert(Op.getRegMask());
- MaskInfos.resize(RegMasks.size()+1);
+ MaskInfos.resize(RegMasks.size() + 1);
for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
BitVector PU(TRI.getNumRegUnits());
const uint32_t *MB = RegMasks.get(M);
for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) {
if (!(MB[I / 32] & (1u << (I % 32))))
continue;
- for (MCRegUnitIterator U(MCRegister::from(I), &TRI); U.isValid(); ++U)
- PU.set(*U);
+ for (MCRegUnit Unit : TRI.regunits(MCRegister::from(I)))
+ PU.set(Unit);
}
MaskInfos[M].Units = PU.flip();
}
@@ -97,134 +98,75 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
BitVector AS(TRI.getNumRegs());
for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R)
- for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S)
- AS.set(*S);
+ for (MCPhysReg S : TRI.superregs_inclusive(*R))
+ AS.set(S);
AliasInfos[U].Regs = AS;
}
}
+bool PhysicalRegisterInfo::alias(RegisterRef RA, RegisterRef RB) const {
+ return !disjoint(getUnits(RA), getUnits(RB));
+}
+
std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
- // Do not include RR in the alias set.
+ // Do not include Reg in the alias set.
std::set<RegisterId> AS;
- assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg));
- if (isRegMaskId(Reg)) {
+ assert(!RegisterRef::isUnitId(Reg) && "No units allowed");
+ if (RegisterRef::isMaskId(Reg)) {
// XXX SLOW
const uint32_t *MB = getRegMaskBits(Reg);
for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
+ if (MB[i / 32] & (1u << (i % 32)))
continue;
AS.insert(i);
}
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
return AS;
}
+ assert(RegisterRef::isRegId(Reg));
for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
AS.insert(*AI);
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (aliasRM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
+
return AS;
}
-bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
- assert(Register::isPhysicalRegister(RA.Reg));
- assert(Register::isPhysicalRegister(RB.Reg));
-
- MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
- MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
- // Reg units are returned in the numerical order.
- while (UMA.isValid() && UMB.isValid()) {
- // Skip units that are masked off in RA.
- std::pair<RegisterId,LaneBitmask> PA = *UMA;
- if (PA.second.any() && (PA.second & RA.Mask).none()) {
- ++UMA;
- continue;
- }
- // Skip units that are masked off in RB.
- std::pair<RegisterId,LaneBitmask> PB = *UMB;
- if (PB.second.any() && (PB.second & RB.Mask).none()) {
- ++UMB;
- continue;
- }
+std::set<RegisterId> PhysicalRegisterInfo::getUnits(RegisterRef RR) const {
+ std::set<RegisterId> Units;
- if (PA.first == PB.first)
- return true;
- if (PA.first < PB.first)
- ++UMA;
- else if (PB.first < PA.first)
- ++UMB;
- }
- return false;
-}
+ if (RR.Reg == 0)
+ return Units; // Empty
-bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
- assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
- const uint32_t *MB = getRegMaskBits(RM.Reg);
- bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
- // If the lane mask information is "full", e.g. when the given lane mask
- // is a superset of the lane mask from the register class, check the regmask
- // bit directly.
- if (RR.Mask == LaneBitmask::getAll())
- return !Preserved;
- const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass;
- if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask)
- return !Preserved;
-
- // Otherwise, check all subregisters whose lane mask overlaps the given
- // mask. For each such register, if it is preserved by the regmask, then
- // clear the corresponding bits in the given mask. If at the end, all
- // bits have been cleared, the register does not alias the regmask (i.e.
- // is it preserved by it).
- LaneBitmask M = RR.Mask;
- for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) {
- LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex());
- if ((SM & RR.Mask).none())
- continue;
- unsigned SR = SI.getSubReg();
- if (!(MB[SR/32] & (1u << (SR%32))))
- continue;
- // The subregister SR is preserved.
- M &= ~SM;
- if (M.none())
- return false;
+ if (RR.isReg()) {
+ if (RR.Mask.none())
+ return Units; // Empty
+ for (MCRegUnitMaskIterator UM(RR.idx(), &TRI); UM.isValid(); ++UM) {
+ auto [U, M] = *UM;
+ if (M.none() || (M & RR.Mask).any())
+ Units.insert(U);
+ }
+ return Units;
}
- return true;
-}
-
-bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
- assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg));
+ assert(RR.isMask());
unsigned NumRegs = TRI.getNumRegs();
- const uint32_t *BM = getRegMaskBits(RM.Reg);
- const uint32_t *BN = getRegMaskBits(RN.Reg);
-
- for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) {
- // Intersect the negations of both words. Disregard reg=0,
- // i.e. 0th bit in the 0th word.
- uint32_t C = ~BM[w] & ~BN[w];
- if (w == 0)
- C &= ~1;
- if (C)
- return true;
+ const uint32_t *MB = getRegMaskBits(RR.idx());
+ for (unsigned I = 0, E = (NumRegs + 31) / 32; I != E; ++I) {
+ uint32_t C = ~MB[I]; // Clobbered regs
+ if (I == 0) // Reg 0 should be ignored
+ C &= maskLeadingOnes<unsigned>(31);
+ if (I + 1 == E && NumRegs % 32 != 0) // Last word may be partial
+ C &= maskTrailingOnes<unsigned>(NumRegs % 32);
+ if (C == 0)
+ continue;
+ while (C != 0) {
+ unsigned T = llvm::countr_zero(C);
+ unsigned CR = 32 * I + T; // Clobbered reg
+ for (MCRegUnit U : TRI.regunits(CR))
+ Units.insert(U);
+ C &= ~(1u << T);
+ }
}
-
- // Check the remaining registers in the last word.
- unsigned TailRegs = NumRegs % 32;
- if (TailRegs == 0)
- return false;
- unsigned TW = NumRegs / 32;
- uint32_t TailMask = (1u << TailRegs) - 1;
- if (~BM[TW] & ~BN[TW] & TailMask)
- return true;
-
- return false;
+ return Units;
}
RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
@@ -234,20 +176,133 @@ RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask));
if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) {
const RegInfo &RI = RegInfos[R];
- LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask
- : LaneBitmask::getAll();
+ LaneBitmask RCM =
+ RI.RegClass ? RI.RegClass->LaneMask : LaneBitmask::getAll();
LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask);
return RegisterRef(R, M & RCM);
}
llvm_unreachable("Invalid arguments: unrelated registers?");
}
+bool PhysicalRegisterInfo::equal_to(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg == B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask == B.Mask;
+
+ // Compare reg units lexicographically.
+ MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return false;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end.
+ return static_cast<int>(AI.isValid()) == static_cast<int>(BI.isValid());
+}
+
+bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg < B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask < B.Mask;
+ if (A.Mask == B.Mask)
+ return A.Reg < B.Reg;
+
+ // Compare reg units lexicographically.
+ llvm::MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ llvm::MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return AReg < BReg;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end: assume invalid < valid.
+ return static_cast<int>(AI.isValid()) < static_cast<int>(BI.isValid());
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, RegisterRef A) const {
+ if (A.Reg == 0 || A.isReg()) {
+ if (0 < A.idx() && A.idx() < TRI.getNumRegs())
+ OS << TRI.getName(A.idx());
+ else
+ OS << printReg(A.idx(), &TRI);
+ OS << PrintLaneMaskShort(A.Mask);
+ } else if (A.isUnit()) {
+ OS << printRegUnit(A.idx(), &TRI);
+ } else {
+ assert(A.isMask());
+ // RegMask SS flag is preserved by idx().
+ unsigned Idx = Register::stackSlot2Index(A.idx());
+ const char *Fmt = Idx < 0x10000 ? "%04x" : "%08x";
+ OS << "M#" << format(Fmt, Idx);
+ }
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, const RegisterAggr &A) const {
+ OS << '{';
+ for (unsigned U : A.units())
+ OS << ' ' << printRegUnit(U, &TRI);
+ OS << " }";
+}
+
bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg))
+ if (RR.isMask())
return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
if (Units.test(P.first))
return true;
@@ -256,13 +311,13 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
}
bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ if (RR.isMask()) {
BitVector T(PRI.getMaskUnits(RR.Reg));
return T.reset(Units).none();
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
if (!Units.test(P.first))
return false;
@@ -271,13 +326,13 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
}
RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ if (RR.isMask()) {
Units |= PRI.getMaskUnits(RR.Reg);
return *this;
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
Units.set(P.first);
}
@@ -350,22 +405,14 @@ RegisterRef RegisterAggr::makeRegRef() const {
LaneBitmask M;
for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
+ std::pair<uint32_t, LaneBitmask> P = *I;
if (Units.test(P.first))
M |= P.second.none() ? LaneBitmask::getAll() : P.second;
}
return RegisterRef(F, M);
}
-void RegisterAggr::print(raw_ostream &OS) const {
- OS << '{';
- for (int U = Units.find_first(); U >= 0; U = Units.find_next(U))
- OS << ' ' << printRegUnit(U, &PRI.getTRI());
- OS << " }";
-}
-
-RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
- bool End)
+RegisterAggr::ref_iterator::ref_iterator(const RegisterAggr &RG, bool End)
: Owner(&RG) {
for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
RegisterRef R = RG.PRI.getRefForUnit(U);
@@ -375,7 +422,23 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
Index = End ? Masks.size() : 0;
}
-raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) {
- A.print(OS);
+raw_ostream &operator<<(raw_ostream &OS, const RegisterAggr &A) {
+ A.getPRI().print(OS, A);
return OS;
}
+
+raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMaskShort &P) {
+ if (P.Mask.all())
+ return OS;
+ if (P.Mask.none())
+ return OS << ":*none*";
+
+ LaneBitmask::Type Val = P.Mask.getAsInteger();
+ if ((Val & 0xffff) == Val)
+ return OS << ':' << format("%04llX", Val);
+ if ((Val & 0xffffffff) == Val)
+ return OS << ':' << format("%08llX", Val);
+ return OS << ':' << PrintLaneMask(P.Mask);
+}
+
+} // namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index d9ced9191fae..75fbc8ba35b1 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -65,13 +65,13 @@ void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
// This is the entry block.
if (MBB->pred_empty()) {
for (const auto &LI : MBB->liveins()) {
- for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
- if (LiveRegs[*Unit] != -1) {
- LiveRegs[*Unit] = -1;
- MBBReachingDefs[MBBNumber][*Unit].push_back(-1);
+ if (LiveRegs[Unit] != -1) {
+ LiveRegs[Unit] = -1;
+ MBBReachingDefs[MBBNumber][Unit].push_back(-1);
}
}
}
@@ -128,16 +128,15 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
for (auto &MO : MI->operands()) {
if (!isValidRegDef(MO))
continue;
- for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid();
- ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
// This instruction explicitly defines the current reg unit.
- LLVM_DEBUG(dbgs() << printRegUnit(*Unit, TRI) << ":\t" << CurInstr
- << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << printRegUnit(Unit, TRI) << ":\t" << CurInstr << '\t'
+ << *MI);
// How many instructions since this reg unit was last written?
- if (LiveRegs[*Unit] != CurInstr) {
- LiveRegs[*Unit] = CurInstr;
- MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
+ if (LiveRegs[Unit] != CurInstr) {
+ LiveRegs[Unit] = CurInstr;
+ MBBReachingDefs[MBBNumber][Unit].push_back(CurInstr);
}
}
}
@@ -269,8 +268,8 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
assert(MBBNumber < MBBReachingDefs.size() &&
"Unexpected basic block number.");
int LatestDef = ReachingDefDefaultVal;
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- for (int Def : MBBReachingDefs[MBBNumber][*Unit]) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ for (int Def : MBBReachingDefs[MBBNumber][Unit]) {
if (Def >= InstId)
break;
DefRes = Def;
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 91795f3d27fe..666199139630 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -58,7 +58,7 @@ class RABasic : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
// context
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
// state
std::unique_ptr<Spiller> SpillerInstance;
@@ -213,8 +213,8 @@ bool RABasic::spillInterferences(const LiveInterval &VirtReg,
SmallVector<const LiveInterval *, 8> Intfs;
// Collect interferences assigned to any alias of the physical register.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
for (const auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index b1743d3f987d..81f3d2c8099f 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -43,6 +43,7 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+namespace llvm {
cl::opt<unsigned> EvictInterferenceCutoff(
"regalloc-eviction-max-interference-cutoff", cl::Hidden,
cl::desc("Number of interferences after which we declare "
@@ -50,6 +51,7 @@ cl::opt<unsigned> EvictInterferenceCutoff(
"is a compilation cost-saving consideration. To "
"disable, pass a very large number."),
cl::init(10));
+}
#define DEBUG_TYPE "regalloc"
#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
@@ -100,9 +102,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
#endif
break;
case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
-#if defined(LLVM_HAVE_TF_AOT)
Ret = createReleaseModeAdvisor();
-#endif
break;
}
if (Ret)
@@ -201,8 +201,8 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
EvictionCost Cost;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// If there is 10 or more interferences, chances are one is heavier.
const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
if (Interferences.size() >= EvictInterferenceCutoff)
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 46838570a2fc..52dd946a6854 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -121,7 +121,7 @@ public:
protected:
RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
- Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const;
+ bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const;
// Get the upper limit of elements in the given Order we need to analize.
// TODO: is this heuristic, we could consider learning it.
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 775e66e48406..864beb8720f4 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -75,15 +75,15 @@ namespace {
}
private:
- MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
const RegClassFilterFunc ShouldAllocateClass;
/// Basic block currently being allocated.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
/// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
@@ -106,7 +106,7 @@ namespace {
}
};
- using LiveRegMap = SparseSet<LiveReg>;
+ using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
@@ -161,8 +161,8 @@ namespace {
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- UsedInInstr.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.insert(Unit);
}
// Check if physreg is clobbered by instruction's regmask(s).
@@ -176,10 +176,10 @@ namespace {
bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
return true;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (UsedInInstr.count(*Units))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (UsedInInstr.count(Unit))
return true;
- if (LookAtPhysRegUses && PhysRegUses.count(*Units))
+ if (LookAtPhysRegUses && PhysRegUses.count(Unit))
return true;
}
return false;
@@ -188,14 +188,14 @@ namespace {
/// Mark physical register as being used in a register use operand.
/// This is only used by the special livethrough handling code.
void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- PhysRegUses.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ PhysRegUses.insert(Unit);
}
/// Remove mark of physical register being used in the instruction.
void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- UsedInInstr.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.erase(Unit);
}
enum : unsigned {
@@ -240,6 +240,8 @@ namespace {
void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
Register Reg) const;
+ void findAndSortDefOperandIndexes(const MachineInstr &MI);
+
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
void handleBundle(MachineInstr &MI);
@@ -265,18 +267,18 @@ namespace {
void allocVirtRegUndef(MachineOperand &MO);
void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
MCPhysReg Reg);
- void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg);
- void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
bool LookAtPhysRegUses = false);
- void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+ bool useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
MachineBasicBlock::iterator
getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
SmallSet<Register, 2> &PrologLiveIns) const;
void reloadAtBegin(MachineBasicBlock &MBB);
- void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
@@ -308,13 +310,13 @@ bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
}
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
- RegUnitStates[*UI] = NewState;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnitStates[Unit] = NewState;
}
bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != regFree)
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (RegUnitStates[Unit] != regFree)
return false;
}
return true;
@@ -552,7 +554,7 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
if (PhysReg == 0)
continue;
- MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
if (RegUnitStates[FirstUnit] == regLiveIn)
continue;
@@ -593,8 +595,7 @@ bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
bool displacedAny = false;
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- unsigned Unit = *UI;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
switch (unsigned VirtReg = RegUnitStates[Unit]) {
default: {
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -623,7 +624,7 @@ bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
- MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
case regFree:
LLVM_DEBUG(dbgs() << '\n');
@@ -648,8 +649,8 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
case regPreAssigned:
@@ -875,10 +876,11 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Variation of defineVirtReg() with special handling for livethrough regs
/// (tied or earlyclobber) that may interfere with preassigned uses.
-void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end()) {
MCPhysReg PrevReg = LRI->PhysReg;
@@ -909,11 +911,13 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
/// perform an allocation if:
/// - It is a dead definition without any uses.
/// - The value is live out and all uses are in different basic blocks.
-void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+///
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg, bool LookAtPhysRegUses) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -948,6 +952,23 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
<< LRI->Reloaded << '\n');
bool Kill = LRI->LastUse == nullptr;
spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
+
+ // We need to place additional spills for each indirect destination of an
+ // INLINEASM_BR.
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) {
+ int FI = StackSlotForVirtReg[VirtReg];
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isMBB()) {
+ MachineBasicBlock *Succ = MO.getMBB();
+ TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill,
+ FI, &RC, TRI, VirtReg);
+ ++NumStores;
+ Succ->addLiveIn(PhysReg);
+ }
+ }
+ }
+
LRI->LastUse = nullptr;
}
LRI->LiveOut = false;
@@ -957,15 +978,16 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
BundleVirtRegsMap[VirtReg] = PhysReg;
}
markRegUsedInInstr(PhysReg);
- setPhysReg(MI, MO, PhysReg);
+ return setPhysReg(MI, MO, PhysReg);
}
/// Allocates a register for a VirtReg use.
-void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -1002,8 +1024,7 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
if (LRI->Error) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
- setPhysReg(MI, MO, *AllocationOrder.begin());
- return;
+ return setPhysReg(MI, MO, *AllocationOrder.begin());
}
}
@@ -1013,18 +1034,17 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
}
markRegUsedInInstr(LRI->PhysReg);
- setPhysReg(MI, MO, LRI->PhysReg);
+ return setPhysReg(MI, MO, LRI->PhysReg);
}
-/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
-/// may invalidate any operand pointers. Return true if the operand kills its
-/// register.
-void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+/// Changes operand OpNum in MI the refer the PhysReg, considering subregs.
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
- return;
+ return false;
}
// Handle subregister index.
@@ -1040,7 +1060,8 @@ void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
// register kill.
if (MO.isKill()) {
MI.addRegisterKilled(PhysReg, TRI, true);
- return;
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
@@ -1050,7 +1071,10 @@ void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MI.addRegisterDead(PhysReg, TRI, true);
else
MI.addRegisterDefined(PhysReg, TRI);
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
}
+ return false;
}
#ifndef NDEBUG
@@ -1090,8 +1114,8 @@ void RegAllocFast::dumpState() const {
if (PhysReg != 0) {
assert(Register::isPhysicalRegister(PhysReg) &&
"mapped to physreg");
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ assert(RegUnitStates[Unit] == VirtReg && "inverse map valid");
}
}
}
@@ -1130,6 +1154,72 @@ void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts
}
}
+/// Compute \ref DefOperandIndexes so it contains the indices of "def" operands
+/// that are to be allocated. Those are ordered in a way that small classes,
+/// early clobbers and livethroughs are allocated first.
+void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
+ DefOperandIndexes.clear();
+
+ // Track number of defs which may consume a register from the class.
+ std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+ assert(RegClassDefCounts[0] == 0);
+
+ LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.readsReg()) {
+ if (Reg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) << '\n');
+ markPhysRegUsedInInstr(Reg);
+ }
+ }
+
+ if (MO.isDef()) {
+ if (Reg.isVirtual() && shouldAllocateRegister(Reg))
+ DefOperandIndexes.push_back(I);
+
+ addRegClassDefCounts(RegClassDefCounts, Reg);
+ }
+ }
+
+ llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ const MachineOperand &MO0 = MI.getOperand(I0);
+ const MachineOperand &MO1 = MI.getOperand(I1);
+ Register Reg0 = MO0.getReg();
+ Register Reg1 = MO1.getReg();
+ const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+ const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+ // Identify regclass that are easy to use up completely just in this
+ // instruction.
+ unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+ unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+ bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+ bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+ if (SmallClass0 > SmallClass1)
+ return true;
+ if (SmallClass0 < SmallClass1)
+ return false;
+
+ // Allocate early clobbers and livethrough operands first.
+ bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+ (MO0.getSubReg() == 0 && !MO0.isUndef());
+ bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+ (MO1.getSubReg() == 0 && !MO1.isUndef());
+ if (Livethrough0 > Livethrough1)
+ return true;
+ if (Livethrough0 < Livethrough1)
+ return false;
+
+ // Tie-break rule: operand index.
+ return I0 < I1;
+ });
+}
+
void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// The basic algorithm here is:
// 1. Mark registers of def operands as free
@@ -1201,6 +1291,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Allocate virtreg defs.
if (HasDef) {
if (HasVRegDef) {
+ // Note that Implicit MOs can get re-arranged by defineVirtReg(), so loop
+ // multiple times to ensure no operand is missed.
+ bool ReArrangedImplicitOps = true;
+
// Special handling for early clobbers, tied operands or subregister defs:
// Compared to "normal" defs these:
// - Must not use a register that is pre-assigned for a use operand.
@@ -1208,90 +1302,45 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// heuristic to figure out a good operand order before doing
// assignments.
if (NeedToAssignLiveThroughs) {
- DefOperandIndexes.clear();
PhysRegUses.clear();
- // Track number of defs which may consume a register from the class.
- std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
- assert(RegClassDefCounts[0] == 0);
-
- LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (MO.readsReg()) {
- if (Reg.isPhysical()) {
- LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
- << '\n');
- markPhysRegUsedInInstr(Reg);
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ findAndSortDefOperandIndexes(MI);
+ for (uint16_t OpIdx : DefOperandIndexes) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+ unsigned Reg = MO.getReg();
+ if (MO.isEarlyClobber() ||
+ (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
+ (MO.getSubReg() && !MO.isUndef())) {
+ ReArrangedImplicitOps = defineLiveThroughVirtReg(MI, OpIdx, Reg);
+ } else {
+ ReArrangedImplicitOps = defineVirtReg(MI, OpIdx, Reg);
+ }
+ if (ReArrangedImplicitOps) {
+ // Implicit operands of MI were re-arranged,
+ // re-compute DefOperandIndexes.
+ break;
}
- }
-
- if (MO.isDef()) {
- if (Reg.isVirtual() && shouldAllocateRegister(Reg))
- DefOperandIndexes.push_back(I);
-
- addRegClassDefCounts(RegClassDefCounts, Reg);
- }
- }
-
- llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
- const MachineOperand &MO0 = MI.getOperand(I0);
- const MachineOperand &MO1 = MI.getOperand(I1);
- Register Reg0 = MO0.getReg();
- Register Reg1 = MO1.getReg();
- const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
- const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
-
- // Identify regclass that are easy to use up completely just in this
- // instruction.
- unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
- unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
-
- bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
- bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
- if (SmallClass0 > SmallClass1)
- return true;
- if (SmallClass0 < SmallClass1)
- return false;
-
- // Allocate early clobbers and livethrough operands first.
- bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
- (MO0.getSubReg() == 0 && !MO0.isUndef());
- bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
- (MO1.getSubReg() == 0 && !MO1.isUndef());
- if (Livethrough0 > Livethrough1)
- return true;
- if (Livethrough0 < Livethrough1)
- return false;
-
- // Tie-break rule: operand index.
- return I0 < I1;
- });
-
- for (uint16_t OpIdx : DefOperandIndexes) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
- unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() ||
- (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
- (MO.getSubReg() && !MO.isUndef())) {
- defineLiveThroughVirtReg(MI, OpIdx, Reg);
- } else {
- defineVirtReg(MI, OpIdx, Reg);
}
}
} else {
// Assign virtual register defs.
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (Reg.isVirtual())
- defineVirtReg(MI, I, Reg);
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ ReArrangedImplicitOps = defineVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitOps) {
+ break;
+ }
+ }
+ }
}
}
}
@@ -1304,9 +1353,11 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isDef())
continue;
+ Register Reg = MO.getReg();
+
// subreg defs don't free the full register. We left the subreg number
// around as a marker in setPhysReg() to recognize this case here.
- if (MO.getSubReg() != 0) {
+ if (Reg.isPhysical() && MO.getSubReg() != 0) {
MO.setSubReg(0);
continue;
}
@@ -1317,7 +1368,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Do not free tied operands and early clobbers.
if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
continue;
- Register Reg = MO.getReg();
if (!Reg)
continue;
if (Reg.isVirtual()) {
@@ -1364,38 +1414,42 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
// Allocate virtreg uses and insert reloads as necessary.
+ // Implicit MOs can get moved/removed by useVirtReg(), so loop multiple
+ // times to ensure no operand is missed.
bool HasUndefUse = false;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isUse())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
- continue;
-
- if (MO.isUndef()) {
- HasUndefUse = true;
- continue;
- }
-
+ bool ReArrangedImplicitMOs = true;
+ while (ReArrangedImplicitMOs) {
+ ReArrangedImplicitMOs = false;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
+ continue;
- // Populate MayLiveAcrossBlocks in case the use block is allocated before
- // the def block (removing the vreg uses).
- mayLiveIn(Reg);
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ continue;
+ }
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
- assert(!MO.isInternalRead() && "Bundles not supported");
- assert(MO.readsReg() && "reading use");
- useVirtReg(MI, I, Reg);
+ assert(!MO.isInternalRead() && "Bundles not supported");
+ assert(MO.readsReg() && "reading use");
+ ReArrangedImplicitMOs = useVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitMOs)
+ break;
+ }
}
// Allocate undef operands. This is a separate step because in a situation
// like ` = OP undef %X, %X` both operands need the same register assign
// so we should perform the normal assignment first.
if (HasUndefUse) {
- for (MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (MachineOperand &MO : MI.all_uses()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
@@ -1407,8 +1461,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (MachineOperand &MO : llvm::reverse(MI.operands())) {
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
+ for (MachineOperand &MO : llvm::reverse(MI.all_defs())) {
+ if (!MO.isEarlyClobber())
continue;
assert(!MO.getSubReg() && "should be already handled in def processing");
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index b43a4d2a4b85..68f6ea3268a9 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -444,31 +444,27 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
- Register PrevReg) const {
- auto Order =
- AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
- MCRegister PhysReg;
- for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
- if ((*I).id() == PrevReg.id())
- continue;
+bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
+ MCRegister FromReg) const {
+ auto HasRegUnitInterference = [&](MCRegUnit Unit) {
+ // Instantiate a "subquery", not to be confused with the Queries array.
+ LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
+ return SubQ.checkInterference();
+ };
- MCRegUnitIterator Units(*I, TRI);
- for (; Units.isValid(); ++Units) {
- // Instantiate a "subquery", not to be confused with the Queries array.
- LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
- if (subQ.checkInterference())
- break;
+ for (MCRegister Reg :
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) {
+ if (Reg == FromReg)
+ continue;
+ // If no units have interference, reassignment is possible.
+ if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
+ LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+ << printReg(FromReg, TRI) << " to "
+ << printReg(Reg, TRI) << '\n');
+ return true;
}
- // If no units have interference, break out with the current PhysReg.
- if (!Units.isValid())
- PhysReg = *I;
}
- if (PhysReg)
- LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
- << printReg(PrevReg, TRI) << " to "
- << printReg(PhysReg, TRI) << '\n');
- return PhysReg;
+ return false;
}
/// evictInterference - Evict any interferring registers that prevent VirtReg
@@ -487,8 +483,8 @@ void RAGreedy::evictInterference(const LiveInterval &VirtReg,
// Collect all interfering virtregs first.
SmallVector<const LiveInterval *, 8> Intfs;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// We usually have the interfering VRegs cached so collectInterferingVRegs()
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
@@ -1286,10 +1282,12 @@ static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
/// VirtReg.
static bool readsLaneSubset(const MachineRegisterInfo &MRI,
const MachineInstr *MI, const LiveInterval &VirtReg,
- const TargetRegisterInfo *TRI, SlotIndex Use) {
+ const TargetRegisterInfo *TRI, SlotIndex Use,
+ const TargetInstrInfo *TII) {
// Early check the common case.
- if (MI->isCopy() &&
- MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
+ auto DestSrc = TII->isCopyInstr(*MI);
+ if (DestSrc &&
+ DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
return false;
// FIXME: We're only considering uses, but should be consider defs too?
@@ -1348,14 +1346,14 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
// the allocation.
for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
- if (MI->isFullCopy() ||
+ if (TII->isFullCopyInstr(*MI) ||
(SplitSubClass &&
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
TII, TRI, RegClassInfo)) ||
// TODO: Handle split for subranges with subclass constraints?
(!SplitSubClass && VirtReg.hasSubRanges() &&
- !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
+ !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
@@ -1404,9 +1402,9 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
- .checkInterference())
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
+ .checkInterference())
continue;
// We know that VirtReg is a continuous interval from FirstInstr to
@@ -1417,7 +1415,7 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
// StartIdx and after StopIdx.
//
LiveIntervalUnion::SegmentIter IntI =
- Matrix->getLiveUnions()[*Units] .find(StartIdx);
+ Matrix->getLiveUnions()[Unit].find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1439,8 +1437,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
}
// Add fixed interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- const LiveRange &LR = LIS->getRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &LR = LIS->getRegUnit(Unit);
LiveRange::const_iterator I = LR.find(StartIdx);
LiveRange::const_iterator E = LR.end();
@@ -1771,8 +1769,8 @@ bool RAGreedy::mayRecolorAllInterferences(
SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
@@ -1960,7 +1958,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
// don't add it to NewVRegs because its physical register will be restored
// below. Other vregs in CurrentNewVRegs are created by calling
// selectOrSplit and should be added into NewVRegs.
- for (Register &R : CurrentNewVRegs) {
+ for (Register R : CurrentNewVRegs) {
if (RecoloringCandidates.count(&LIS->getInterval(R)))
continue;
NewVRegs.push_back(R);
@@ -2142,7 +2140,7 @@ void RAGreedy::initializeCSRCost() {
/// \p Out is not cleared before being populated.
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
- if (!Instr.isFullCopy())
+ if (!TII->isFullCopyInstr(Instr))
continue;
// Look for the other end of the copy.
Register OtherReg = Instr.getOperand(0).getReg();
@@ -2457,21 +2455,22 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
MI.getOpcode() == TargetOpcode::STATEPOINT;
};
for (MachineInstr &MI : MBB) {
- if (MI.isCopy()) {
- const MachineOperand &Dest = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (DestSrc) {
+ const MachineOperand &Dest = *DestSrc->Destination;
+ const MachineOperand &Src = *DestSrc->Source;
Register SrcReg = Src.getReg();
Register DestReg = Dest.getReg();
// Only count `COPY`s with a virtual register as source or destination.
if (SrcReg.isVirtual() || DestReg.isVirtual()) {
if (SrcReg.isVirtual()) {
SrcReg = VRM->getPhys(SrcReg);
- if (Src.getSubReg())
+ if (SrcReg && Src.getSubReg())
SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
}
if (DestReg.isVirtual()) {
DestReg = VRM->getPhys(DestReg);
- if (Dest.getSubReg())
+ if (DestReg && Dest.getSubReg())
DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
}
if (SrcReg != DestReg)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index e0ac88c0aeb9..0f8f9a7d5811 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -166,20 +166,20 @@ private:
SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
// context
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
// Shortcuts to some useful interface.
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
// analyses
- SlotIndexes *Indexes;
- MachineBlockFrequencyInfo *MBFI;
- MachineDominatorTree *DomTree;
- MachineLoopInfo *Loops;
- MachineOptimizationRemarkEmitter *ORE;
- EdgeBundles *Bundles;
- SpillPlacement *SpillPlacer;
- LiveDebugVariables *DebugVars;
+ SlotIndexes *Indexes = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+ EdgeBundles *Bundles = nullptr;
+ SpillPlacement *SpillPlacer = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
// state
std::unique_ptr<Spiller> SpillerInstance;
@@ -204,7 +204,7 @@ private:
CO_Interf = 2
};
- uint8_t CutOffInfo;
+ uint8_t CutOffInfo = CutOffStage::CO_None;
#ifndef NDEBUG
static const char *const StageName[];
@@ -278,9 +278,9 @@ private:
/// Flags for the live range priority calculation, determined once per
/// machine function.
- bool RegClassPriorityTrumpsGlobalness;
+ bool RegClassPriorityTrumpsGlobalness = false;
- bool ReverseLocalAssignment;
+ bool ReverseLocalAssignment = false;
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index b3d926eeb552..925a0f085c4b 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -634,8 +634,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// vregLI overlaps fixed regunit interference.
bool Interference = false;
- for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
- if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
+ for (MCRegUnit Unit : TRI.regunits(PReg)) {
+ if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
Interference = true;
break;
}
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
index b3a13cc92316..e031019a4c91 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -81,9 +81,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
#endif
break;
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release:
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCPRIORITYMODEL)
Ret = createReleaseModePriorityAdvisor();
-#endif
break;
}
if (Ret)
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 16afd15e29e4..6657cf3c1ef4 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -208,8 +208,8 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
MCPhysReg Reg = CSRegs[i];
if (SavedRegs.test(Reg)) {
// Save subregisters
- for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
- SavedRegs.set(*SR);
+ for (MCPhysReg SR : TRI.subregs(Reg))
+ SavedRegs.set(SR);
}
}
}
diff --git a/llvm/lib/CodeGen/RegisterBank.cpp b/llvm/lib/CodeGen/RegisterBank.cpp
index 512b21aeacaf..8e0a0b0dc282 100644
--- a/llvm/lib/CodeGen/RegisterBank.cpp
+++ b/llvm/lib/CodeGen/RegisterBank.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
@@ -21,15 +22,16 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank(
- unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses, unsigned NumRegClasses)
- : ID(ID), Name(Name), Size(Size) {
+RegisterBank::RegisterBank(unsigned ID, const char *Name,
+ const uint32_t *CoveredClasses,
+ unsigned NumRegClasses)
+ : ID(ID), Name(Name) {
ContainedRegClasses.resize(NumRegClasses);
ContainedRegClasses.setBitsInMask(CoveredClasses);
}
-bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
+bool RegisterBank::verify(const RegisterBankInfo &RBI,
+ const TargetRegisterInfo &TRI) const {
assert(isValid() && "Invalid register bank");
for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
@@ -50,7 +52,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
// Verify that the Size of the register bank is big enough to cover
// all the register classes it covers.
- assert(getSize() >= TRI.getRegSizeInBits(SubRC) &&
+ assert(RBI.getMaximumSize(getID()) >= TRI.getRegSizeInBits(SubRC) &&
"Size is not big enough for all the subclasses!");
assert(covers(SubRC) && "Not all subclasses are covered");
}
@@ -64,7 +66,7 @@ bool RegisterBank::covers(const TargetRegisterClass &RC) const {
}
bool RegisterBank::isValid() const {
- return ID != InvalidID && Name != nullptr && Size != 0 &&
+ return ID != InvalidID && Name != nullptr &&
// A register bank that does not cover anything is useless.
!ContainedRegClasses.empty();
}
@@ -89,7 +91,7 @@ void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
OS << getName();
if (!IsForDebug)
return;
- OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n"
+ OS << "(ID:" << getID() << ")\n"
<< "isValid:" << isValid() << '\n'
<< "Number of Covered register classes: " << ContainedRegClasses.count()
<< '\n';
diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 27ed17b9f4f6..658a09fd8700 100644
--- a/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -52,9 +53,11 @@ const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
//------------------------------------------------------------------------------
// RegisterBankInfo implementation.
//------------------------------------------------------------------------------
-RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
- unsigned NumRegBanks)
- : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
+RegisterBankInfo::RegisterBankInfo(const RegisterBank **RegBanks,
+ unsigned NumRegBanks, const unsigned *Sizes,
+ unsigned HwMode)
+ : RegBanks(RegBanks), NumRegBanks(NumRegBanks), Sizes(Sizes),
+ HwMode(HwMode) {
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
@@ -70,7 +73,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n');
- assert(RegBank.verify(TRI) && "RegBank is invalid");
+ assert(RegBank.verify(*this, TRI) && "RegBank is invalid");
}
#endif // NDEBUG
return true;
@@ -79,31 +82,32 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Reg.isPhysical()) {
+ if (!Reg.isVirtual()) {
// FIXME: This was probably a copy to a virtual register that does have a
// type we could use.
- return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI);
+ return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr;
}
- assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+ if (auto *RB = dyn_cast_if_present<const RegisterBank *>(RegClassOrBank))
return RB;
- if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ if (auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank))
return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
return nullptr;
}
-const TargetRegisterClass &
+const TargetRegisterClass *
RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
assert(Reg.isPhysical() && "Reg must be a physreg");
const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
if (RegRCIt != PhysRegMinimalRCs.end())
- return *RegRCIt->second;
- const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg);
+ return RegRCIt->second;
+ const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT());
PhysRegMinimalRCs[Reg] = PhysRC;
- return *PhysRC;
+ return PhysRC;
}
const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
@@ -131,10 +135,10 @@ const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
// If the register already has a class, fallback to MRI::constrainRegClass.
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- if (RegClassOrBank.is<const TargetRegisterClass *>())
+ if (isa<const TargetRegisterClass *>(RegClassOrBank))
return MRI.constrainRegClass(Reg, &RC);
- const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
// Otherwise, all we can do is ensure the bank covers the class, and set it.
if (RB && !RB->covers(RC))
return nullptr;
@@ -498,7 +502,7 @@ unsigned RegisterBankInfo::getSizeInBits(Register Reg,
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
// Because this is expensive, we'll cache the register class by calling
- auto *RC = &getMinimalPhysRegClass(Reg, TRI);
+ auto *RC = getMinimalPhysRegClass(Reg, TRI);
assert(RC && "Expecting Register class");
return TRI.getRegSizeInBits(*RC);
}
@@ -515,12 +519,14 @@ LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
}
#endif
-bool RegisterBankInfo::PartialMapping::verify() const {
+bool RegisterBankInfo::PartialMapping::verify(
+ const RegisterBankInfo &RBI) const {
assert(RegBank && "Register bank not set");
assert(Length && "Empty mapping");
assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
// Check if the minimum width fits into RegBank.
- assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
+ assert(RBI.getMaximumSize(RegBank->getID()) >= Length &&
+ "Register bank too small for Mask");
return true;
}
@@ -545,13 +551,14 @@ bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
return true;
}
-bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
+bool RegisterBankInfo::ValueMapping::verify(const RegisterBankInfo &RBI,
+ unsigned MeaningfulBitWidth) const {
assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
// Check that each register bank is big enough to hold the partial value:
// this check is done by PartialMapping::verify
- assert(PartMap.verify() && "Partial mapping is invalid");
+ assert(PartMap.verify(RBI) && "Partial mapping is invalid");
// The original value should completely be mapped.
// Thus the maximum accessed index + 1 is the size of the original value.
OrigValueBitWidth =
@@ -625,8 +632,9 @@ bool RegisterBankInfo::InstructionMapping::verify(
(void)MOMapping;
// Register size in bits.
// This size must match what the mapping expects.
- assert(MOMapping.verify(RBI->getSizeInBits(
- Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
+ assert(MOMapping.verify(*RBI, RBI->getSizeInBits(
+ Reg, MF.getRegInfo(),
+ *MF.getSubtarget().getRegisterInfo())) &&
"Value mapping is invalid");
}
return true;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ab1215974fc5..e49885b6ad96 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -116,7 +116,7 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold(
cl::desc("For a large interval, if it is coalesed with other live "
"intervals many times more than the threshold, stop its "
"coalescing to control the compile time. "),
- cl::init(100));
+ cl::init(256));
namespace {
@@ -153,12 +153,6 @@ namespace {
using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
- /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
- /// To avoid repeatedly merging sets of DbgValueLocs, instead record
- /// which vregs have been coalesced, and where to. This map is from
- /// vreg => {set of vregs merged in}.
- DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums;
-
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
LaneBitmask ShrinkMask;
@@ -404,14 +398,14 @@ char RegisterCoalescer::ID = 0;
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
-INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
- "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
- "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
const MachineInstr *MI, Register &Src,
@@ -1257,8 +1251,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
static bool definesFullReg(const MachineInstr &MI, Register Reg) {
assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
- for (const MachineOperand &Op : MI.operands()) {
- if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ for (const MachineOperand &Op : MI.all_defs()) {
+ if (Op.getReg() != Reg)
continue;
// Return true if we define the full register or don't care about the value
// inside other subregisters.
@@ -1502,11 +1496,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
- // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
+
if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
+ // VNI is in ValNo - remove any segments in this SubRange that have
+ // this ValNo
SR.removeValNo(RmValNo);
- UpdatedSubRanges = true;
}
+
+ // We may not have a defined value at this point, but still need to
+ // clear out any empty subranges tentatively created by
+ // updateRegDefUses. The original subrange def may have only undefed
+ // some lanes.
+ UpdatedSubRanges = true;
} else {
// We know that this lane is defined by this instruction,
// but at this point it may be empty because it is not used by
@@ -1545,9 +1546,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// no live-ranges would have been created for ECX.
// Fix that!
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
- for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
- Units.isValid(); ++Units)
- if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
@@ -1561,8 +1561,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
MCRegister Reg = NewMIImplDefs[i];
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
@@ -1713,8 +1713,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// is still part of the function (but about to be erased), mark all
// defs of DstReg in it as <undef>, so that shrinkToUses would
// ignore them.
- for (MachineOperand &MO : CopyMI->operands())
- if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+ for (MachineOperand &MO : CopyMI->all_defs())
+ if (MO.getReg() == DstReg)
MO.setIsUndef(true);
LIS->shrinkToUses(&DstLI);
@@ -2164,14 +2164,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// Deny any overlapping intervals. This depends on all the reserved
// register live ranges to look like dead defs.
if (!MRI->isConstantPhysReg(DstReg)) {
- for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
// Abort if not all the regunits are reserved.
- for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+ for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
if (!MRI->isReserved(*RI))
return false;
}
- if (RHS.overlaps(LIS->getRegUnit(*UI))) {
- LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
+ if (RHS.overlaps(LIS->getRegUnit(Unit))) {
+ LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
<< '\n');
return false;
}
@@ -2202,6 +2202,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// ...
// use %physreg_x
CopyMI = MRI->getVRegDef(SrcReg);
+ deleteInstr(CopyMI);
} else {
// VReg is copied into physreg:
// %y = def
@@ -2246,15 +2247,15 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
<< printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
+ deleteInstr(CopyMI);
+
// Create a new dead def at the new def location.
- for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
- LiveRange &LR = LIS->getRegUnit(*UI);
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
+ LiveRange &LR = LIS->getRegUnit(Unit);
LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
}
}
- deleteInstr(CopyMI);
-
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -2569,8 +2570,8 @@ public:
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
LaneBitmask L;
- for (const MachineOperand &MO : DefMI->operands()) {
- if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
+ for (const MachineOperand &MO : DefMI->all_defs()) {
+ if (MO.getReg() != Reg)
continue;
L |= TRI->getSubRegIndexLaneMask(
TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
@@ -2786,13 +2787,22 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
// to erase the IMPLICIT_DEF instruction.
- if (DefMI &&
- DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ MachineBasicBlock *OtherMBB = Indexes->getMBBFromIndex(V.OtherVNI->def);
+ if (DefMI && DefMI->getParent() != OtherMBB) {
LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
<< " extends into "
<< printMBBReference(*DefMI->getParent())
<< ", keeping it.\n");
OtherV.ErasableImplicitDef = false;
+ } else if (OtherMBB->hasEHPadSuccessor()) {
+ // If OtherV is defined in a basic block that has EH pad successors then
+ // we get the same problem not just if OtherV is live beyond its basic
+ // block, but beyond the last call instruction in its basic block. Handle
+ // this case conservatively.
+ LLVM_DEBUG(
+ dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " may be live into EH pad successors, keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
} else {
// We deferred clearing these lanes in case we needed to save them
OtherV.ValidLanes &= ~OtherV.WriteLanes;
@@ -2952,7 +2962,7 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
// its lanes.
if (OtherV.ErasableImplicitDef &&
TrackSubRegLiveness &&
- (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+ (OtherV.ValidLanes & ~V.ValidLanes).any()) {
LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
OtherV.ErasableImplicitDef = false;
@@ -3029,8 +3039,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
if (MI.isDebugOrPseudoInstr())
return false;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() != Reg)
continue;
if (!MO.readsReg())
continue;
@@ -3759,18 +3769,9 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
};
- // Scan for potentially unsound DBG_VALUEs: examine first the register number
- // Reg, and then any other vregs that may have been merged into it.
- auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
- Func(Reg);
- if (DbgMergedVRegNums.count(Reg))
- for (Register X : DbgMergedVRegNums[Reg])
- Func(X);
- };
-
// Scan for unsound updates of both the source and destination register.
- PerformScan(CP.getSrcReg(), ScanForSrcReg);
- PerformScan(CP.getDstReg(), ScanForDstReg);
+ ScanForSrcReg(CP.getSrcReg());
+ ScanForDstReg(CP.getDstReg());
}
void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
@@ -4099,7 +4100,7 @@ void RegisterCoalescer::releaseMemory() {
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
- LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
<< "********** Function: " << fn.getName() << '\n');
// Variables changed between a setjmp and a longjump can have undefined value
@@ -4151,7 +4152,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF->verify(this, "Before register coalescing");
DbgVRegToValues.clear();
- DbgMergedVRegNums.clear();
buildVRegToDbgValueMap(fn);
RegClassInfo.runOnMachineFunction(fn);
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index d4c29f96a4f9..f86aa3a16720 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -521,9 +521,8 @@ class RegisterOperandsCollector {
if (Reg.isVirtual()) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
}
}
@@ -557,9 +556,8 @@ class RegisterOperandsCollector {
: MRI.getMaxLaneMaskForVReg(Reg);
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
}
}
};
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index 8d10a5558315..c00d3fde6426 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -96,13 +96,13 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
}
void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- BV.set(*RUI);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.set(Unit);
}
void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- BV.reset(*RUI);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.reset(Unit);
}
void RegScavenger::determineKillsAndDefs() {
@@ -198,25 +198,13 @@ void RegScavenger::forward() {
// S1 is can be freely clobbered.
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
- bool SubUsed = false;
- for (const MCPhysReg &SubReg : TRI->subregs(Reg))
- if (isRegUsed(SubReg)) {
- SubUsed = true;
- break;
- }
- bool SuperUsed = false;
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
- if (isRegUsed(*SR)) {
- SuperUsed = true;
- break;
- }
- }
- if (!SubUsed && !SuperUsed) {
+ if (none_of(TRI->subregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); }) &&
+ none_of(TRI->superregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); })) {
MBB->getParent()->verify(nullptr, "In Register Scavenger");
llvm_unreachable("Using an undefined register!");
}
- (void)SubUsed;
- (void)SuperUsed;
}
} else {
assert(MO.isDef());
@@ -282,70 +270,6 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
return Mask;
}
-Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
- BitVector &Candidates,
- unsigned InstrLimit,
- MachineBasicBlock::iterator &UseMI) {
- int Survivor = Candidates.find_first();
- assert(Survivor > 0 && "No candidates for scavenging");
-
- MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
- assert(StartMI != ME && "MI already at terminator");
- MachineBasicBlock::iterator RestorePointMI = StartMI;
- MachineBasicBlock::iterator MI = StartMI;
-
- bool inVirtLiveRange = false;
- for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
- if (MI->isDebugOrPseudoInstr()) {
- ++InstrLimit; // Don't count debug instructions
- continue;
- }
- bool isVirtKillInsn = false;
- bool isVirtDefInsn = false;
- // Remove any candidates touched by instruction.
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isRegMask())
- Candidates.clearBitsNotInMask(MO.getRegMask());
- if (!MO.isReg() || MO.isUndef() || !MO.getReg())
- continue;
- if (MO.getReg().isVirtual()) {
- if (MO.isDef())
- isVirtDefInsn = true;
- else if (MO.isKill())
- isVirtKillInsn = true;
- continue;
- }
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
- // If we're not in a virtual reg's live range, this is a valid
- // restore point.
- if (!inVirtLiveRange) RestorePointMI = MI;
-
- // Update whether we're in the live range of a virtual register
- if (isVirtKillInsn) inVirtLiveRange = false;
- if (isVirtDefInsn) inVirtLiveRange = true;
-
- // Was our survivor untouched by this instruction?
- if (Candidates.test(Survivor))
- continue;
-
- // All candidates gone?
- if (Candidates.none())
- break;
-
- Survivor = Candidates.find_first();
- }
- // If we ran off the end, that's where we want to restore.
- if (MI == ME) RestorePointMI = ME;
- assert(RestorePointMI != StartMI &&
- "No available scavenger restore location!");
-
- // We ran out of candidates, so stop the search.
- UseMI = RestorePointMI;
- return Survivor;
-}
-
/// Given the bitvector \p Available of free register units at position
/// \p From. Search backwards to find a register that is part of \p
/// Candidates and not used/clobbered until the point \p To. If there is
@@ -522,73 +446,6 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
return Scavenged[SI];
}
-Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I,
- int SPAdj, bool AllowSpill) {
- MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getMF();
- // Consider all allocatable registers in the register class initially
- BitVector Candidates = TRI->getAllocatableSet(MF, RC);
-
- // Exclude all the registers being used by the instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !MO.getReg().isVirtual())
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
-
- // If we have already scavenged some registers, remove them from the
- // candidates. If we end up recursively calling eliminateFrameIndex, we don't
- // want to be clobbering previously scavenged registers or their associated
- // stack slots.
- for (ScavengedInfo &SI : Scavenged) {
- if (SI.Reg) {
- if (isRegUsed(SI.Reg)) {
- LLVM_DEBUG(
- dbgs() << "Removing " << printReg(SI.Reg, TRI) <<
- " from scavenging candidates since it was already scavenged\n");
- for (MCRegAliasIterator AI(SI.Reg, TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
- }
- }
-
- // Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- BitVector Available = getRegsAvailable(RC);
- Available &= Candidates;
- if (Available.any())
- Candidates = Available;
-
- // Find the register whose use is furthest away.
- MachineBasicBlock::iterator UseMI;
- Register SReg = findSurvivorReg(I, Candidates, 25, UseMI);
-
- // If we found an unused register there is no reason to spill it.
- if (!isRegUsed(SReg)) {
- LLVM_DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
- return SReg;
- }
-
- if (!AllowSpill)
- return 0;
-
-#ifndef NDEBUG
- for (ScavengedInfo &SI : Scavenged) {
- assert(SI.Reg != SReg && "scavenged a previously scavenged register");
- }
-#endif
-
- ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
- Scavenged.Restore = &*std::prev(UseMI);
-
- LLVM_DEBUG(dbgs() << "Scavenged register (with spill): "
- << printReg(SReg, TRI) << "\n");
-
- return SReg;
-}
-
Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 05bbd1a2d03b..bc3ef1c0329a 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -101,9 +101,9 @@ private:
const SmallVectorImpl<LiveInterval*> &Intervals) const;
- LiveIntervals *LIS;
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ LiveIntervals *LIS = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
};
} // end anonymous namespace
@@ -249,7 +249,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
/// Undef use operands are not tracked in the equivalence class,
/// but need to be updated if they are tied; take care to only
/// update the tied operand.
- unsigned OperandNo = MI->getOperandNo(&MO);
+ unsigned OperandNo = MO.getOperandNo();
unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo);
MI->getOperand(TiedIdx).setReg(VReg);
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 87b8ac59bdba..57cd1fcffb61 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -229,8 +229,6 @@ void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<LoopAccessLegacyAnalysis>();
- AU.addPreserved<DemandedBitsWrapperPass>();
AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 0ad6ef84220a..11bdf3bb2ba8 100644
--- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "reset-machine-function"
@@ -68,6 +69,10 @@ namespace {
MF.reset();
MF.initTargetMachineFunctionInfo(MF.getSubtarget());
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(MF);
+
if (EmitFallbackDiag) {
const Function &F = MF.getFunction();
DiagnosticInfoISelFallback DiagFallback(F);
diff --git a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
index dd70a2f23e45..cc29bdce1210 100644
--- a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,12 +52,13 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
if (!MD)
return false;
const auto &Section = *cast<MDString>(MD->getOperand(0));
- if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+ if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
return false;
auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
// Assume it currently only has features.
assert(AuxMDs.getNumOperands() == 1);
- auto *Features = cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
+ Constant *Features =
+ cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
if (!Features->getUniqueInteger()[kSanitizerBinaryMetadataUARBit])
return false;
// Calculate size of stack args for the function.
@@ -69,12 +70,18 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
Align = std::max(Align, MFI.getObjectAlign(i).value());
}
Size = (Size + Align - 1) & ~(Align - 1);
+ if (!Size)
+ return false;
+ // Non-zero size, update metadata.
auto &F = MF.getFunction();
IRBuilder<> IRB(F.getContext());
MDBuilder MDB(F.getContext());
// Keep the features and append size of stack args to the metadata.
- F.setMetadata(LLVMContext::MD_pcsections,
- MDB.createPCSections(
- {{Section.getString(), {Features, IRB.getInt32(Size)}}}));
+ APInt NewFeatures = Features->getUniqueInteger();
+ NewFeatures.setBit(kSanitizerBinaryMetadataUARHasSizeBit);
+ F.setMetadata(
+ LLVMContext::MD_pcsections,
+ MDB.createPCSections({{Section.getString(),
+ {IRB.getInt(NewFeatures), IRB.getInt32(Size)}}}));
return false;
}
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 696b29018ae6..14ec41920e3e 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,6 @@ void SUnit::removePred(const SDep &D) {
SUnit *N = D.getSUnit();
SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
assert(NumPreds > 0 && "NumPreds will underflow!");
@@ -193,21 +191,25 @@ void SUnit::removePred(const SDep &D) {
--N->NumSuccs;
}
if (!N->isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakPredsLeft > 0 && "WeakPredsLeft will underflow!");
--WeakPredsLeft;
- else {
+ } else {
assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
--NumPredsLeft;
}
}
if (!isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
--N->WeakSuccsLeft;
- else {
+ } else {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
--N->NumSuccsLeft;
}
}
+ N->Succs.erase(Succ);
+ Preds.erase(I);
if (P.getLatency() != 0) {
this->setDepthDirty();
N->setHeightDirty();
@@ -722,6 +724,8 @@ void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) {
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
+ assert(TargetSU != nullptr && "Invalid target SUnit");
+ assert(SU != nullptr && "Invalid SUnit");
FixOrder();
// If insertion of the edge SU->TargetSU would create a cycle
// then there is a path from TargetSU to SU.
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 1b213e87e75c..239b44857c28 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -208,13 +208,12 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
ExitSU.setInstr(ExitMI);
// Add dependencies on the defs and uses of the instruction.
if (ExitMI) {
- for (const MachineOperand &MO : ExitMI->operands()) {
- if (!MO.isReg() || MO.isDef()) continue;
+ for (const MachineOperand &MO : ExitMI->all_uses()) {
Register Reg = MO.getReg();
if (Reg.isPhysical()) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
} else if (Reg.isVirtual() && MO.readsReg()) {
- addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
+ addVRegUseDeps(&ExitSU, MO.getOperandNo());
}
}
}
@@ -334,11 +333,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
addPhysRegDataDeps(SU, OperIdx);
// Clear previous uses and defs of this register and its subergisters.
- for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
- if (Uses.contains(*SubReg))
- Uses.eraseAll(*SubReg);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ if (Uses.contains(SubReg))
+ Uses.eraseAll(SubReg);
if (!MO.isDead())
- Defs.eraseAll(*SubReg);
+ Defs.eraseAll(SubReg);
}
if (MO.isDead() && SU->isCall) {
// Calls will not be reordered because of chain dependencies (see
@@ -1026,15 +1025,14 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
void ScheduleDAGInstrs::Value2SUsMap::dump() {
for (const auto &[ValType, SUs] : *this) {
- if (ValType.is<const Value*>()) {
- const Value *V = ValType.get<const Value*>();
+ if (isa<const Value *>(ValType)) {
+ const Value *V = cast<const Value *>(ValType);
if (isa<UndefValue>(V))
dbgs() << "Unknown";
else
V->printAsOperand(dbgs());
- }
- else if (ValType.is<const PseudoSourceValue*>())
- dbgs() << ValType.get<const PseudoSourceValue*>();
+ } else if (isa<const PseudoSourceValue *>(ValType))
+ dbgs() << cast<const PseudoSourceValue *>(ValType);
else
llvm_unreachable("Unknown Value type.");
@@ -1522,7 +1520,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
namespace llvm {
-LLVM_DUMP_METHOD
+LLVM_ATTRIBUTE_UNUSED
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 5fd78eccf732..30d959704745 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -98,15 +99,15 @@ namespace {
class SelectOptimize : public FunctionPass {
const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *TSI;
+ const TargetSubtargetInfo *TSI = nullptr;
const TargetLowering *TLI = nullptr;
const TargetTransformInfo *TTI = nullptr;
- const LoopInfo *LI;
- DominatorTree *DT;
+ const LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
- OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI = nullptr;
+ OptimizationRemarkEmitter *ORE = nullptr;
TargetSchedModel TSchedModel;
public:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0a3ebd73d272..de909cc10795 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -30,11 +30,14 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ByteProvider.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
@@ -57,7 +60,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -169,7 +171,8 @@ namespace {
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
- /// This records all nodes attempted to add to the worklist since we
+
+ /// This records all nodes attempted to be added to the worklist since we
/// considered a new worklist entry. As we keep do not add duplicate nodes
/// in the worklist, this is different from the tail of the worklist.
SmallSetVector<SDNode *, 32> PruningList;
@@ -262,7 +265,7 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
- void AddToWorklist(SDNode *N) {
+ void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Deleted Node added to Worklist");
@@ -271,7 +274,8 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
- ConsiderForPruning(N);
+ if (IsCandidateForPruning)
+ ConsiderForPruning(N);
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
@@ -362,6 +366,11 @@ namespace {
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ // Looks up the chain to find a unique (unaliased) store feeding the passed
+ // load. If no such store is found, returns a nullptr.
+ // Note: This will look past a CALLSEQ_START if the load is chained to it so
+ // so that it can find stack stores for byval params.
+ StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
// Scalars have size 0 to distinguish from singleton vectors.
SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
@@ -417,11 +426,12 @@ namespace {
SDValue visitSUBC(SDNode *N);
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
- SDValue visitADDCARRY(SDNode *N);
+ SDValue visitUADDO_CARRY(SDNode *N);
SDValue visitSADDO_CARRY(SDNode *N);
- SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
+ SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N);
SDValue visitSUBE(SDNode *N);
- SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
@@ -434,6 +444,7 @@ namespace {
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitAVG(SDNode *N);
+ SDValue visitABD(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitMULO(SDNode *N);
@@ -476,10 +487,12 @@ namespace {
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
+ SDValue visitVP_FADD(SDNode *N);
+ SDValue visitVP_FSUB(SDNode *N);
SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
- SDValue visitFMA(SDNode *N);
+ template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -495,6 +508,7 @@ namespace {
SDValue visitFABS(SDNode *N);
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFREXP(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMinMax(SDNode *N);
SDValue visitBRCOND(SDNode *N);
@@ -503,6 +517,7 @@ namespace {
SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+ SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
@@ -527,8 +542,12 @@ namespace {
SDValue visitFP_TO_BF16(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
+ SDValue visitGET_FPENV_MEM(SDNode *N);
+ SDValue visitSET_FPENV_MEM(SDNode *N);
+ template <class MatchContextClass>
SDValue visitFADDForFMACombine(SDNode *N);
+ template <class MatchContextClass>
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
@@ -539,9 +558,12 @@ namespace {
SDValue N0,
SDValue N1);
SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1);
+ SDValue N1, SDNodeFlags Flags);
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
+ SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
+ EVT VT, SDValue N0, SDValue N1,
+ SDNodeFlags Flags = SDNodeFlags());
SDValue visitShiftByConstant(SDNode *N);
@@ -579,11 +601,15 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI);
+
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
+ SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -713,6 +739,11 @@ namespace {
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
+ /// Helper function for mergeConsecutiveStores which checks if all the store
+ /// nodes have the same underlying object. We can still reuse the first
+ /// store's pointer info if all the stores are from the same object.
+ bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
+
/// This is a helper function for mergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store introducing
@@ -841,6 +872,138 @@ public:
void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
};
+class EmptyMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+
+public:
+ EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI) {}
+
+ bool match(SDValue OpN, unsigned Opcode) const {
+ return Opcode == OpN->getOpcode();
+ }
+
+ // Same as SelectionDAG::getNode().
+ template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
+ return DAG.getNode(std::forward<ArgT>(Args)...);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
+ }
+};
+
+class VPMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ SDValue RootMaskOp;
+ SDValue RootVectorLenOp;
+
+public:
+ VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
+ assert(Root->isVPOpcode());
+ if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
+ RootMaskOp = Root->getOperand(*RootMaskPos);
+
+ if (auto RootVLenPos =
+ ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
+ RootVectorLenOp = Root->getOperand(*RootVLenPos);
+ }
+
+ /// whether \p OpVal is a node that is functionally compatible with the
+ /// NodeType \p Opc
+ bool match(SDValue OpVal, unsigned Opc) const {
+ if (!OpVal->isVPOpcode())
+ return OpVal->getOpcode() == Opc;
+
+ auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
+ !OpVal->getFlags().hasNoFPExcept());
+ if (BaseOpc != Opc)
+ return false;
+
+ // Make sure the mask of OpVal is true mask or is same as Root's.
+ unsigned VPOpcode = OpVal->getOpcode();
+ if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
+ SDValue MaskOp = OpVal.getOperand(*MaskPos);
+ if (RootMaskOp != MaskOp &&
+ !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
+ return false;
+ }
+
+ // Make sure the EVL of OpVal is same as Root's.
+ if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
+ if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
+ return false;
+ return true;
+ }
+
+ // Specialize based on number of operands.
+ // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
+ // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
+ // DAG.getNode(Opcode, DL, VT); }
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {Operand, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
+ SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
+ }
+};
+
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -1099,7 +1262,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
- SDValue N0, SDValue N1) {
+ SDValue N0, SDValue N1,
+ SDNodeFlags Flags) {
EVT VT = N0.getValueType();
if (N0.getOpcode() != Opc)
@@ -1118,8 +1282,12 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
- return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ SDNodeFlags NewFlags;
+ if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
+ Flags.hasNoUnsignedWrap())
+ NewFlags.setNoUnsignedWrap(true);
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
}
}
@@ -1177,13 +1345,32 @@ SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
return SDValue();
- if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
return Combined;
- if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
return Combined;
return SDValue();
}
+// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
+// Note that we only expect Flags to be passed from FP operations. For integer
+// operations they need to be dropped.
+SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
+ const SDLoc &DL, EVT VT, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ N0->hasOneUse() && N1->hasOneUse() &&
+ TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
+ TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+ return DAG.getNode(RedOpc, DL, VT,
+ DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0)));
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
@@ -1591,8 +1778,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
WorklistInserter AddNodes(*this);
// Add all the dag nodes to the worklist.
+ //
+ // Note: All nodes are not added to PruningList here, this is because the only
+ // nodes which can be deleted are those which have no uses and all other nodes
+ // which would otherwise be added to the worklist by the first call to
+ // getNextWorklistEntry are already present in it.
for (SDNode &Node : DAG.allnodes())
- AddToWorklist(&Node);
+ AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -1627,11 +1819,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
// won't repeatedly process the same operand.
- CombinedNodes.insert(N);
for (const SDValue &ChildN : N->op_values())
if (!CombinedNodes.count(ChildN.getNode()))
AddToWorklist(ChildN.getNode());
+ CombinedNodes.insert(N);
SDValue RV = combine(N);
if (!RV.getNode())
@@ -1665,10 +1857,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// out), because re-visiting the EntryToken and its users will not uncover
// any additional opportunities, but there may be a large number of such
// users, potentially causing compile time explosion.
- if (RV.getOpcode() != ISD::EntryToken) {
- AddToWorklist(RV.getNode());
- AddUsersToWorklist(RV.getNode());
- }
+ if (RV.getOpcode() != ISD::EntryToken)
+ AddToWorklistWithUsers(RV.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1700,10 +1890,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SSUBO:
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
- case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
- case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
@@ -1720,6 +1910,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::AVGFLOORU:
case ISD::AVGCEILS:
case ISD::AVGCEILU: return visitAVG(N);
+ case ISD::ABDS:
+ case ISD::ABDU: return visitABD(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO:
@@ -1770,7 +1962,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
- case ISD::FMA: return visitFMA(N);
+ case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -1791,6 +1983,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMAXIMUM: return visitFMinMax(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::FFREXP: return visitFFREXP(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -1812,6 +2005,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
case ISD::FREEZE: return visitFREEZE(N);
+ case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
+ case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1824,7 +2019,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
#include "llvm/IR/VPIntrinsics.def"
return visitVPOp(N);
@@ -2131,6 +2328,39 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
+// computeKnownBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ KnownBits &Known) {
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ Known = DAG.computeKnownBits(Op);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ if (isNullOrNullSplat(Op0))
+ Op = Op1;
+ else if (isNullOrNullSplat(Op1))
+ Op = Op0;
+ else
+ return false;
+
+ Known = DAG.computeKnownBits(Op);
+
+ return (Known.Zero | 1).isAllOnes();
+}
+
/// Return true if 'Use' is a load or a store that uses N as its base pointer
/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
@@ -2206,11 +2436,12 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
return SDValue();
- // We can't hoist div/rem because of immediate UB (not speculatable).
- unsigned Opcode = N->getOpcode();
- if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ // We can't hoist all instructions because of immediate UB (not speculatable).
+ // For example div/rem by zero.
+ if (!DAG.isSafeToSpeculativelyExecuteNode(N))
return SDValue();
+ unsigned Opcode = N->getOpcode();
EVT VT = N->getValueType(0);
SDValue Cond = N1.getOperand(0);
SDValue TVal = N1.getOperand(1);
@@ -2258,6 +2489,17 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
SelOpNo = 1;
Sel = BO->getOperand(1);
+
+ // Peek through trunc to shift amount type.
+ if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
+ BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
+ // This is valid when the truncated bits of x are already zero.
+ SDValue Op;
+ KnownBits Known;
+ if (isTruncateOf(DAG, Sel, Op, Known) &&
+ Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
+ Sel = Op;
+ }
}
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
@@ -2310,18 +2552,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// constant. Eliminate the binop by pulling the constant math into the
// select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
// CBO, CF + CBO
- NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
- : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
- if (!CanFoldNonConst && !NewCT.isUndef() &&
- !isConstantOrConstantVector(NewCT, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
+ NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
+ if (!NewCT)
return SDValue();
- NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
- : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
- if (!CanFoldNonConst && !NewCF.isUndef() &&
- !isConstantOrConstantVector(NewCF, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
+ NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
+ if (!NewCF)
return SDValue();
}
@@ -2420,6 +2658,12 @@ static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
return false;
}
+static bool
+areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
+ return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
+ (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
+}
+
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
@@ -2444,6 +2688,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
+ SDLoc(N), VT);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -2509,12 +2757,22 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// equivalent to (add x, c).
// Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
// equivalent to (add x, c).
+ // Do this optimization only when adding c does not introduce instructions
+ // for adding carries.
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
if (isADDLike(N0, DAG) && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- return DAG.getNode(ISD::ADD, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ // If N0's type does not split or is a sign mask, it does not introduce
+ // add carry.
+ auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
+ bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
+ TyActn == TargetLoweringBase::TypePromoteInteger ||
+ isMinSignedConstant(N0.getOperand(1));
+ if (NoAddCarry)
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
}
return SDValue();
};
@@ -2522,6 +2780,11 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return Add;
if (SDValue Add = ReassociateAddOr(N1, N0))
return Add;
+
+ // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
+ return SD;
}
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
@@ -2626,7 +2889,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse()) {
+ N0.hasOneUse() &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
+ !N->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
@@ -2714,6 +2980,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SADDSAT;
SDLoc DL(N);
// fold (add_sat x, undef) -> -1
@@ -2744,14 +3011,14 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an add.
- if (Opcode == ISD::UADDSAT)
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
return SDValue();
}
-static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
+ bool ForceCarryReconstruction = false) {
bool Masked = false;
// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
@@ -2762,11 +3029,17 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
}
if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ if (ForceCarryReconstruction)
+ return V;
+
Masked = true;
V = V.getOperand(0);
continue;
}
+ if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
+ return V;
+
break;
}
@@ -2774,7 +3047,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
if (V.getResNo() != 1)
return SDValue();
- if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
+ if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
@@ -2842,7 +3115,10 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
+ !N0->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
@@ -2864,6 +3140,15 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
}
}
+ // add (mul x, C), x -> mul x, C+1
+ if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
+ N0.hasOneUse()) {
+ SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
+ }
+
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
// rather than 'add 0/-1' (the zext should get folded).
// add (sext i1 Y), X --> sub X, (zext i1 Y)
@@ -2884,16 +3169,16 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
}
}
- // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+ // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
N1.getResNo() == 0)
- return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
- // (add X, Carry) -> (addcarry X, 0, Carry)
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ // (add X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
- return DAG.getNode(ISD::ADDCARRY, DL,
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), N0,
DAG.getConstant(0, DL, VT), Carry);
@@ -2923,7 +3208,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
DL, MVT::Glue));
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
@@ -2995,12 +3280,12 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- if (!IsSigned) {
- // If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ if (!IsSigned) {
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
@@ -3024,20 +3309,20 @@ SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
if (VT.isVector())
return SDValue();
- // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
// If Y + 1 cannot overflow.
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
SDValue Y = N1.getOperand(0);
SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
- if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
+ if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
N1.getOperand(2));
}
- // (uaddo X, Carry) -> (addcarry X, 0, Carry)
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
DAG.getConstant(0, SDLoc(N), VT), Carry);
return SDValue();
@@ -3062,7 +3347,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
+SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
@@ -3072,16 +3357,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
- // fold (addcarry x, y, false) -> (uaddo x, y)
+ // fold (uaddo_carry x, y, false) -> (uaddo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
- // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
EVT CarryVT = CarryIn.getValueType();
@@ -3092,73 +3377,52 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
- if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
+ if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
return Combined;
- if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
+ if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
return Combined;
// We want to avoid useless duplication.
- // TODO: This is done automatically for binary operations. As ADDCARRY is
+ // TODO: This is done automatically for binary operations. As UADDO_CARRY is
// not a binary operation, this is not really possible to leverage this
// existing mechanism for it. However, if more operations require the same
// deduplication logic, then it may be worth generalize.
SDValue Ops[] = {N1, N0, CarryIn};
SDNode *CSENode =
- DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
+ DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
return SDValue();
}
-SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue CarryIn = N->getOperand(2);
- SDLoc DL(N);
-
- // canonicalize constant to RHS
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
- return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
-
- // fold (saddo_carry x, y, false) -> (saddo x, y)
- if (isNullConstant(CarryIn)) {
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
- return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
- }
-
- return SDValue();
-}
-
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
* carry is now linearized, other transforms can kick in and optimize the DAG.
*
* Patterns typically look something like
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (uaddo_carry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (uaddo_carry X, *, *)
*
* But numerous variation exist. Our goal is to identify A, B, X and Z and
* produce a combine with a single path for carry propagation.
*/
-static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
- SDValue X, SDValue Carry0, SDValue Carry1,
- SDNode *N) {
+static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
+ SelectionDAG &DAG, SDValue X,
+ SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
return SDValue();
if (Carry1.getOpcode() != ISD::UADDO)
@@ -3168,9 +3432,9 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
/**
* First look for a suitable Z. It will present itself in the form of
- * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
*/
- if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
isNullConstant(Carry0.getOperand(1))) {
Z = Carry0.getOperand(2);
} else if (Carry0.getOpcode() == ISD::UADDO &&
@@ -3185,26 +3449,27 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
- SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ SDValue NewY =
+ DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
Combiner.AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
DAG.getConstant(0, DL, X.getValueType()),
NewY.getValue(1));
};
/**
- * (uaddo A, B)
- * |
- * Sum
- * |
- * (addcarry *, 0, Z)
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (uaddo_carry *, 0, Z)
*/
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
}
/**
- * (addcarry A, 0, Z)
+ * (uaddo_carry A, 0, Z)
* |
* Sum
* |
@@ -3241,12 +3506,12 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
// | /
// CarryOut = (or *, *)
//
-// And generate ADDCARRY (or SUBCARRY) with two result values:
+// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
//
-// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
//
-// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
-// a single path for carry/borrow out propagation:
+// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
+// with a single path for carry/borrow out propagation.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue N0, SDValue N1, SDNode *N) {
SDValue Carry0 = getAsCarry(TLI, N0);
@@ -3279,16 +3544,13 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
return SDValue();
SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
- unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
return SDValue();
// Verify that the carry/borrow in is plausibly a carry/borrow bit.
- // TODO: make getAsCarry() aware of how partial carries are merged.
- if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
- return SDValue();
- CarryIn = CarryIn.getOperand(0);
- if (CarryIn.getValueType() != MVT::i1)
+ CarryIn = getAsCarry(TLI, CarryIn, true);
+ if (!CarryIn)
return SDValue();
SDLoc DL(N);
@@ -3315,45 +3577,68 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
return Merged.getValue(1);
}
-SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
- SDNode *N) {
- // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
+ SDValue CarryIn, SDNode *N) {
+ // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
+ // carry.
if (isBitwiseNot(N0))
if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
SDLoc DL(N);
- SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
return CombineTo(
N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
- // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
// or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
(N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
/**
- * When one of the addcarry argument is itself a carry, we may be facing
+ * When one of the uaddo_carry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
*/
if (auto Y = getAsCarry(TLI, N1)) {
// Because both are carries, Y and Z can be swapped.
- if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
return R;
- if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
return R;
}
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
// clamp/truncation if necessary.
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
@@ -3720,11 +4005,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
- // fold (sub Sym, c) -> Sym-c
- if (N1C && GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
- GA->getOffset() -
- (uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
@@ -3776,19 +4056,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}
- // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
- if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
+ // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
+ if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
N0.getResNo() == 0 && N0.hasOneUse())
- return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
+ return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
N0.getOperand(0), N1, N0.getOperand(2));
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
- // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
+ // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
SDValue X = N1;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
- return DAG.getNode(ISD::ADDCARRY, DL,
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
Carry);
}
@@ -3814,7 +4094,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
(N0.getOperand(0) != N1.getOperand(1) ||
N0.getOperand(1) != N1.getOperand(0)))
return SDValue();
- if (!TLI.isOperationLegalOrCustom(Abd, VT))
+ if (!hasOperation(Abd, VT))
return SDValue();
return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
};
@@ -3827,9 +4107,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SSUBSAT;
SDLoc DL(N);
// fold (sub_sat x, undef) -> 0
@@ -3841,7 +4123,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (sub_sat c1, c2) -> c3
- if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// fold vector ops
@@ -3858,6 +4140,10 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (isNullConstant(N1))
return N0;
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
+
return SDValue();
}
@@ -3911,7 +4197,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (subox, c) -> (addo x, -c)
- if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ if (IsSigned && N1C && !N1C->isMinSignedValue()) {
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
@@ -3920,6 +4206,11 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
@@ -3940,12 +4231,12 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
+SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- // fold (subcarry x, y, false) -> (usubo x, y)
+ // fold (usubo_carry x, y, false) -> (usubo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
@@ -4062,13 +4353,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT),
DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL,
- getShiftAmountTy(N0.getValueType()))));
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
@@ -4108,7 +4400,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
// The constant `2` should be treated as (2^0 + 1).
- unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
@@ -4163,8 +4455,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
- N0.getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
return DAG.getNode(
@@ -4223,6 +4515,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
return RMUL;
+ // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -4386,7 +4683,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return DAG.getNegative(N0, DL, VT);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
- if (N1C && N1C->getAPIntValue().isMinSignedValue())
+ if (N1C && N1C->isMinSignedValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4886,11 +5183,57 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
if (N1.isUndef())
return N0;
+ // Fold (avg x, x) --> x
+ if (N0 == N1 && Level >= AfterLegalizeTypes)
+ return N0;
+
// TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
return SDValue();
}
+SDValue DAGCombiner::visitABD(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (abd c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (abds x, 0) -> abs x
+ // fold (abdu x, 0) -> x
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::ABDS)
+ return DAG.getNode(ISD::ABS, DL, VT, N0);
+ if (Opcode == ISD::ABDU)
+ return N0;
+ }
+ }
+
+ // fold (abd x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (abds x, y) -> (abdu x, y) iff both args are known positive
+ if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
+ DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
+ return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
+
+ return SDValue();
+}
+
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
@@ -5108,7 +5451,7 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC, unsigned &BW,
- bool &Unsigned) {
+ bool &Unsigned, SelectionDAG &DAG) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
@@ -5132,6 +5475,26 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
if (!Opcode0)
return SDValue();
+ // We could only need one range check, if the fptosi could never produce
+ // the upper value.
+ if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
+ if (isNullOrNullSplat(N3)) {
+ EVT IntVT = N0.getValueType().getScalarType();
+ EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
+ if (FPVT.isSimple()) {
+ Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
+ const fltSemantics &Semantics = InputTy->getFltSemantics();
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
+ if (IntVT.getSizeInBits() >= MinBitWidth) {
+ Unsigned = true;
+ BW = PowerOf2Ceil(MinBitWidth);
+ return N0;
+ }
+ }
+ }
+ }
+
SDValue N00, N01, N02, N03;
ISD::CondCode N0CC;
switch (N0.getOpcode()) {
@@ -5194,7 +5557,7 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SelectionDAG &DAG) {
unsigned BW;
bool Unsigned;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
@@ -5208,8 +5571,7 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SDLoc DL(Fp);
SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
DAG.getValueType(NewVT.getScalarType()));
- return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
- : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+ return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
}
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
@@ -5298,6 +5660,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
return S;
+ // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
+ auto ReductionOpcode = [](unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ };
+ if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
+ SDLoc(N), VT, N0, N1))
+ return SD;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -5312,8 +5693,7 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
EVT VT = N0.getValueType();
unsigned LogicOpcode = N->getOpcode();
unsigned HandOpcode = N0.getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR) && "Expected logic opcode");
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
assert(HandOpcode == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
@@ -5323,13 +5703,14 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
// FIXME: We should check number of uses of the operands to not increase
// the instruction count for all transforms.
- // Handle size-changing casts.
+ // Handle size-changing casts (or sign_extend_inreg).
SDValue X = N0.getOperand(0);
SDValue Y = N1.getOperand(0);
EVT XVT = X.getValueType();
SDLoc DL(N);
- if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
- HandOpcode == ISD::SIGN_EXTEND) {
+ if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
+ (HandOpcode == ISD::SIGN_EXTEND_INREG &&
+ N0.getOperand(1) == N1.getOperand(1))) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
@@ -5344,11 +5725,14 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
// Avoid infinite looping with PromoteIntBinOp.
// TODO: Should we apply desirable/legal constraints to all opcodes?
- if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
- !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ if ((HandOpcode == ISD::ANY_EXTEND ||
+ HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
return SDValue();
// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ if (HandOpcode == ISD::SIGN_EXTEND_INREG)
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
@@ -5629,6 +6013,172 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
+ assert(
+ (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+ "Invalid Op to combine SETCC with");
+
+ // TODO: Search past casts/truncates.
+ SDValue LHS = LogicOp->getOperand(0);
+ SDValue RHS = LogicOp->getOperand(1);
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
+ LogicOp, LHS.getNode(), RHS.getNode());
+
+ SDValue LHS0 = LHS->getOperand(0);
+ SDValue RHS0 = RHS->getOperand(0);
+ SDValue LHS1 = LHS->getOperand(1);
+ SDValue RHS1 = RHS->getOperand(1);
+ // TODO: We don't actually need a splat here, for vectors we just need the
+ // invariants to hold for each element.
+ auto *LHS1C = isConstOrConstSplat(LHS1);
+ auto *RHS1C = isConstOrConstSplat(RHS1);
+ ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = LHS0.getValueType();
+ SDLoc DL(LogicOp);
+
+ // Check if the operands of an and/or operation are comparisons and if they
+ // compare against the same value. Replace the and/or-cmp-cmp sequence with
+ // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
+ // sequence will be replaced with min-cmp sequence:
+ // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
+ // and and-cmp-cmp will be replaced with max-cmp sequence:
+ // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
+ if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+ TLI.isOperationLegal(ISD::SMIN, OpVT)) {
+ if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC &&
+ LHS->hasOneUse() && RHS->hasOneUse() &&
+ // The two comparisons should have either the same predicate or the
+ // predicate of one of the comparisons is the opposite of the other one.
+ (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
+ // The optimization does not work for `==` or `!=` .
+ !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) {
+ SDValue CommonValue, Operand1, Operand2;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (CCL == CCR) {
+ if (LHS0 == RHS0) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS1;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (LHS1 == RHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS0;
+ CC = CCL;
+ }
+ } else {
+ assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
+ if (LHS0 == RHS1) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS0;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (RHS0 == LHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS1;
+ CC = CCL;
+ }
+ }
+
+ if (CC != ISD::SETCC_INVALID) {
+ unsigned NewOpcode;
+ bool IsSigned = isSignedIntSetCC(CC);
+ if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT ||
+ CC == ISD::SETULT) &&
+ (LogicOp->getOpcode() == ISD::OR)) ||
+ ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT ||
+ CC == ISD::SETUGT) &&
+ (LogicOp->getOpcode() == ISD::AND)))
+ NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
+ else
+ NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+
+ SDValue MinMaxValue =
+ DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
+ return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
+ }
+ }
+ }
+
+ if (TargetPreference == AndOrSETCCFoldKind::None)
+ return SDValue();
+
+ if (CCL == CCR &&
+ CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+ RHS.hasOneUse()) {
+ const APInt &APLhs = LHS1C->getAPIntValue();
+ const APInt &APRhs = RHS1C->getAPIntValue();
+
+ // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
+ // case this is just a compare).
+ if (APLhs == (-APRhs) &&
+ ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
+ DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
+ const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)
+ // -> (icmp ne Abs(A), C)
+ SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
+ return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
+ DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference &
+ (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
+
+ // AndOrSETCCFoldKind::AddAnd:
+ // A == C0 | A == C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
+ // A != C0 & A != C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
+
+ // AndOrSETCCFoldKind::NotAnd:
+ // A == C0 | A == C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) == 0
+ // A != C0 & A != C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) != 0
+
+ const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
+ const APInt &MinC = APIntOps::smin(APRhs, APLhs);
+ APInt Dif = MaxC - MinC;
+ if (!Dif.isZero() && Dif.isPowerOf2()) {
+ if (MaxC.isAllOnes() &&
+ (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
+ SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
+ DAG.getConstant(MinC, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
+
+ SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
+ DAG.getConstant(-MinC, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~Dif, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -5644,6 +6194,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // Canonicalize:
+ // and(x, add) -> and(add, x)
+ if (N1.getOpcode() == ISD::ADD)
+ std::swap(N0, N1);
+
// TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
@@ -5655,8 +6210,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
// in a register.
APInt ADDC = ADDI->getAPIntValue();
APInt SRLC = SRLI->getAPIntValue();
- if (ADDC.getMinSignedBits() <= 64 &&
- SRLC.ult(VT.getSizeInBits()) &&
+ if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
SRLC.getZExtValue());
@@ -5677,55 +6231,6 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
}
}
- // Reduce bit extract of low half of an integer to the narrower type.
- // (and (srl i64:x, K), KMask) ->
- // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- unsigned Size = VT.getSizeInBits();
- const APInt &AndMask = CAnd->getAPIntValue();
- unsigned ShiftBits = CShift->getZExtValue();
-
- // Bail out, this node will probably disappear anyway.
- if (ShiftBits == 0)
- return SDValue();
-
- unsigned MaskBits = AndMask.countTrailingOnes();
- EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
-
- if (AndMask.isMask() &&
- // Required bits must not span the two halves of the integer and
- // must fit in the half size type.
- (ShiftBits + MaskBits <= Size / 2) &&
- TLI.isNarrowingProfitable(VT, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
- TLI.isTruncateFree(VT, HalfVT) &&
- TLI.isZExtFree(HalfVT, VT)) {
- // The isNarrowingProfitable is to avoid regressions on PPC and
- // AArch64 which match a few 64-bit bit insert / bit extract patterns
- // on downstream users of this. Those patterns could probably be
- // extended to handle extensions mixed in.
-
- SDValue SL(N0);
- assert(MaskBits <= Size);
-
- // Extracting the highest bit of the low half.
- EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
- N0.getOperand(0));
-
- SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
- SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
- SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
- SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
- return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
- }
- }
- }
- }
-
return SDValue();
}
@@ -5734,7 +6239,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
if (!AndC->getAPIntValue().isMask())
return false;
- unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+ unsigned ActiveBits = AndC->getAPIntValue().countr_one();
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LoadN->getMemoryVT();
@@ -5898,7 +6403,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
}
case ISD::ZERO_EXTEND:
case ISD::AssertZext: {
- unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ unsigned ActiveBits = Mask->getAPIntValue().countr_one();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT VT = Op.getOpcode() == ISD::AssertZext ?
cast<VTSDNode>(Op.getOperand(1))->getVT() :
@@ -6071,12 +6576,6 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
- // This is probably not worthwhile without a supported type.
- EVT VT = And->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
// Look through an optional extension.
SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
@@ -6104,13 +6603,17 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
return SDValue();
+ // This is probably not worthwhile without a supported type.
+ EVT SrcVT = Src.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(SrcVT))
+ return SDValue();
+
// We might have looked through casts that make this transform invalid.
- // TODO: If the source type is wider than the result type, do the mask and
- // compare in the source type.
- unsigned VTBitWidth = VT.getScalarSizeInBits();
+ unsigned BitWidth = SrcVT.getScalarSizeInBits();
SDValue ShiftAmt = Src.getOperand(1);
auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
- if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth))
+ if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
return SDValue();
// Set source to shift source.
@@ -6131,14 +6634,15 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
// and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
- SDValue X = DAG.getZExtOrTrunc(Src, DL, VT);
- EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
+ EVT CCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
SDValue Mask = DAG.getConstant(
- APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
- SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
- return DAG.getZExtOrTrunc(Setcc, DL, VT);
+ return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
}
/// For targets that support usubsat, match a bit-hack form of that operation
@@ -6181,9 +6685,8 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
SelectionDAG &DAG) {
unsigned LogicOpcode = N->getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR)
- && "Expected bitwise logic operation");
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
return SDValue();
@@ -6230,8 +6733,8 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
SDValue RightHand, SelectionDAG &DAG) {
unsigned LogicOpcode = N->getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR));
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
if (LeftHand.getOpcode() != LogicOpcode ||
RightHand.getOpcode() != LogicOpcode)
return SDValue();
@@ -6276,6 +6779,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
+ VT);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -6330,6 +6837,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6337,6 +6847,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
+ // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -6345,13 +6860,27 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
- // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
+ EVT SrcVT = N0Op0.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
APInt Mask = ~N1C->getAPIntValue();
- Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
+ Mask = Mask.trunc(SrcBitWidth);
+
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (DAG.MaskedValueIsZero(N0Op0, Mask))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
+
+ // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
+ if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
+ TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
+ TLI.isNarrowingProfitable(VT, SrcVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
+ DAG.getZExtOrTrunc(N1, DL, SrcVT)));
+ }
}
// fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
@@ -7046,24 +7575,39 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
EVT VT = N0.getValueType();
- if (N0.getOpcode() == ISD::AND) {
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
+
+ auto peekThroughResize = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
+ return V->getOperand(0);
+ return V;
+ };
+
+ SDValue N0Resized = peekThroughResize(N0);
+ if (N0Resized.getOpcode() == ISD::AND) {
+ SDValue N1Resized = peekThroughResize(N1);
+ SDValue N00 = N0Resized.getOperand(0);
+ SDValue N01 = N0Resized.getOperand(1);
// fold or (and x, y), x --> x
- if (N00 == N1 || N01 == N1)
+ if (N00 == N1Resized || N01 == N1Resized)
return N1;
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
// TODO: Set AllowUndefs = true.
- if (getBitwiseNotOperand(N01, N00,
- /* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+ if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
+ }
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
- if (getBitwiseNotOperand(N00, N01,
- /* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
+ }
}
if (N0.getOpcode() == ISD::XOR) {
@@ -7215,6 +7759,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
@@ -7231,6 +7778,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
+ // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) != 0 or c1/c2 are undef.
auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
@@ -7898,42 +8450,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return SDValue();
}
-namespace {
-
-/// Represents known origin of an individual byte in load combine pattern. The
-/// value of the byte is either constant zero or comes from memory.
-struct ByteProvider {
- // For constant zero providers Load is set to nullptr. For memory providers
- // Load represents the node which loads the byte from memory.
- // ByteOffset is the offset of the byte in the value produced by the load.
- LoadSDNode *Load = nullptr;
- unsigned ByteOffset = 0;
- unsigned VectorOffset = 0;
-
- ByteProvider() = default;
-
- static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset,
- unsigned VectorOffset) {
- return ByteProvider(Load, ByteOffset, VectorOffset);
- }
-
- static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); }
-
- bool isConstantZero() const { return !Load; }
- bool isMemory() const { return Load; }
-
- bool operator==(const ByteProvider &Other) const {
- return Other.Load == Load && Other.ByteOffset == ByteOffset &&
- Other.VectorOffset == VectorOffset;
- }
-
-private:
- ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset)
- : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {}
-};
-
-} // end anonymous namespace
-
/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns std::nullopt if the provider can't be
/// calculated.
@@ -7975,7 +8491,9 @@ private:
/// LOAD
///
/// *ExtractVectorElement
-static const std::optional<ByteProvider>
+using SDByteProvider = ByteProvider<SDNode *>;
+
+static const std::optional<SDByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
std::optional<uint64_t> VectorIndex,
unsigned StartingIndex = 0) {
@@ -8034,7 +8552,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
// provide, then do not provide anything. Otherwise, subtract the index by
// the amount we shifted by.
return Index < ByteShift
- ? ByteProvider::getConstantZero()
+ ? SDByteProvider::getConstantZero()
: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
Depth + 1, VectorIndex, Index);
}
@@ -8049,7 +8567,8 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (Index >= NarrowByteWidth)
return Op.getOpcode() == ISD::ZERO_EXTEND
- ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
: std::nullopt;
return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
StartingIndex);
@@ -8099,11 +8618,12 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
// question
if (Index >= NarrowByteWidth)
return L->getExtensionType() == ISD::ZEXTLOAD
- ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
: std::nullopt;
unsigned BPVectorIndex = VectorIndex.value_or(0U);
- return ByteProvider::getMemory(L, Index, BPVectorIndex);
+ return SDByteProvider::getSrc(L, Index, BPVectorIndex);
}
}
@@ -8191,9 +8711,12 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
!N->isSimple() || N->isIndexed())
return SDValue();
- // Collect all of the stores in the chain.
+ // Collect all of the stores in the chain, upto the maximum store width (i64).
SDValue Chain = N->getChain();
SmallVector<StoreSDNode *, 8> Stores = {N};
+ unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
+ unsigned MaxWideNumBits = 64;
+ unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
@@ -8207,6 +8730,8 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
+ if (MaxStores < Stores.size())
+ return SDValue();
}
// There is no reason to continue if we do not have at least a pair of stores.
if (Stores.size() < 2)
@@ -8215,7 +8740,6 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Handle simple types only.
LLVMContext &Context = *DAG.getContext();
unsigned NumStores = Stores.size();
- unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
unsigned WideNumBits = NumStores * NarrowNumBits;
EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
@@ -8397,23 +8921,24 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
unsigned ByteWidth = VT.getSizeInBits() / 8;
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
- auto MemoryByteOffset = [&] (ByteProvider P) {
- assert(P.isMemory() && "Must be a memory byte provider");
- unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits();
+ auto MemoryByteOffset = [&](SDByteProvider P) {
+ assert(P.hasSrc() && "Must be a memory byte provider");
+ auto *Load = cast<LoadSDNode>(P.Src.value());
+
+ unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
assert(LoadBitWidth % 8 == 0 &&
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
- return IsBigEndianTarget
- ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
- : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
+ : littleEndianByteAt(LoadByteWidth, P.DestOffset);
};
std::optional<BaseIndexOffset> Base;
SDValue Chain;
SmallPtrSet<LoadSDNode *, 8> Loads;
- std::optional<ByteProvider> FirstByteProvider;
+ std::optional<SDByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
// Check if all the bytes of the OR we are looking at are loaded from the same
@@ -8434,9 +8959,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
continue;
}
- assert(P->isMemory() && "provenance should either be memory or zero");
-
- LoadSDNode *L = P->Load;
+ assert(P->hasSrc() && "provenance should either be memory or zero");
+ auto *L = cast<LoadSDNode>(P->Src.value());
// All loads must share the same chain
SDValue LChain = L->getChain();
@@ -8460,7 +8984,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
if (LoadWidthInBit % 8 != 0)
return SDValue();
- unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8;
+ unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
Ptr.addToOffset(ByteOffsetFromVector);
}
@@ -8517,7 +9041,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// So the combined value can be loaded from the first load address.
if (MemoryByteOffset(*FirstByteProvider) != 0)
return SDValue();
- LoadSDNode *FirstLoad = FirstByteProvider->Load;
+ auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
// The node we are looking at matches with the pattern, check if we can
// replace it with a single (possibly zero-extended) load and bswap + shift if
@@ -8715,6 +9239,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
+ return SD;
+
// fold (a^b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
@@ -9462,7 +9991,7 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
SDValue MulhRightOp;
if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
unsigned ActiveBits = IsSignExt
- ? Constant->getAPIntValue().getMinSignedBits()
+ ? Constant->getAPIntValue().getSignificantBits()
: Constant->getAPIntValue().getActiveBits();
if (ActiveBits > NarrowVTSize)
return SDValue();
@@ -9499,14 +10028,59 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
- // Combine to mulh if mulh is legal/custom for the narrow type on the target.
- if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
- return SDValue();
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target
+ // or if it is a vector type then we could transform to an acceptable type and
+ // rely on legalization to split/combine the result.
+ if (NarrowVT.isVector()) {
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
+ if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
+ !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
+ return SDValue();
+ } else {
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+ }
SDValue Result =
DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
- : DAG.getZExtOrTrunc(Result, DL, WideVT));
+ bool IsSigned = N->getOpcode() == ISD::SRA;
+ return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
+}
+
+// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
+// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
+static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
+ SDValue OldLHS = N0.getOperand(0);
+ SDValue OldRHS = N0.getOperand(1);
+
+ // If both operands are bswap/bitreverse, ignore the multiuse
+ // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
+ if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ OldRHS.getOperand(0));
+ }
+
+ if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ NewBitReorder);
+ }
+
+ if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
+ OldRHS.getOperand(0));
+ }
+ }
+ return SDValue();
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -9892,8 +10466,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
- // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
+ // of two bitwidth. The "5" represents (log2 (bitwidth x)).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ isPowerOf2_32(OpSizeInBits) &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
@@ -9912,7 +10488,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// could be set on input to the CTLZ node. If this bit is set, the SRL
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
// to an SRL/XOR pair, which is likely to simplify more.
- unsigned ShAmt = UnknownBits.countTrailingZeros();
+ unsigned ShAmt = UnknownBits.countr_zero();
SDValue Op = N0.getOperand(0);
if (ShAmt) {
@@ -10138,13 +10714,23 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
return SDValue();
}
-// Given a ABS node, detect the following pattern:
+// Given a ABS node, detect the following patterns:
// (ABS (SUB (EXTEND a), (EXTEND b))).
+// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
// Generates UABD/SABD instruction.
SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+ EVT SrcVT = N->getValueType(0);
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::ABS)
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
+ SDLoc DL(N);
if (AbsOp1.getOpcode() != ISD::SUB)
return SDValue();
@@ -10157,9 +10743,11 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
- if (AbsOp1->getFlags().hasNoSignedWrap() &&
- TLI.isOperationLegalOrCustom(ISD::ABDS, VT))
- return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1);
+ if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
+ TLI.preferABDSToABSWithNSW(VT)) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
return SDValue();
}
@@ -10170,17 +10758,20 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
// NOTE: Extensions must be equivalent.
- if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
+ if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
Op0 = Op0.getOperand(0);
Op1 = Op1.getOperand(0);
- SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
+ ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
// fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
// fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
- if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
- return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
+ if (hasOperation(ABDOpcode, VT)) {
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
return SDValue();
}
@@ -10190,8 +10781,8 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (abs c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
+ return C;
// fold (abs (abs x)) -> (abs x)
if (N0.getOpcode() == ISD::ABS)
return N0;
@@ -10277,6 +10868,9 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
}
}
+ if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10447,7 +11041,8 @@ SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
if (NegRHS == False) {
SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
False, CC, TLI, DAG);
- return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ if (Combined)
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
}
}
}
@@ -11091,6 +11686,23 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
+ // Remove a masked store if base pointers and masks are equal.
+ if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
+ if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
+ MST1->isSimple() && MST1->getBasePtr() == Ptr &&
+ !MST->getBasePtr().isUndef() &&
+ ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
+ MST1->getMemoryVT().getStoreSize()) ||
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
+ TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
+ MST->getMemoryVT().getStoreSize())) {
+ CombineTo(MST1, MST1->getChain());
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
@@ -11391,6 +12003,38 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // Match VSELECTs with absolute difference patterns.
+ // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
+ if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
+ N1.getOperand(0) == N2.getOperand(1) &&
+ N1.getOperand(1) == N2.getOperand(0)) {
+ bool IsSigned = isSignedIntSetCC(CC);
+ unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
+ if (hasOperation(ABDOpc, VT)) {
+ switch (CC) {
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
// Match VSELECTs into add with unsigned saturation.
if (hasOperation(ISD::UADDSAT, VT)) {
// Check if one of the arms of the VSELECT is vector with all bits set.
@@ -11612,57 +12256,6 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
- // SETCC(FREEZE(X), CONST, Cond)
- // =>
- // FREEZE(SETCC(X, CONST, Cond))
- // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
- // isn't equivalent to true or false.
- // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
- // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
- //
- // This transformation is beneficial because visitBRCOND can fold
- // BRCOND(FREEZE(X)) to BRCOND(X).
-
- // Conservatively optimize integer comparisons only.
- if (PreferSetCC) {
- // Do this only when SETCC is going to be used by BRCOND.
-
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- bool Updated = false;
-
- // Is 'X Cond C' always true or false?
- auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isZero()) ||
- (Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnes()) ||
- (Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
- (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isZero()) ||
- (Cond == ISD::SETGE && C->isMinSignedValue());
- return True || False;
- };
-
- if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
- if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
- N0 = N0->getOperand(0);
- Updated = true;
- }
- }
- if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
- if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
- N0C)) {
- N1 = N1->getOperand(0);
- Updated = true;
- }
- }
-
- if (Updated)
- return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
- }
-
SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
SDLoc(N), !PreferSetCC);
@@ -11733,7 +12326,8 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ CombineLevel Level) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -11758,10 +12352,14 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
else if (Opcode == ISD::ZERO_EXTEND)
ExtLoadOpcode = ISD::ZEXTLOAD;
+ // Illegal VSELECT may ISel fail if happen after legalization (DAG
+ // Combine2), so we should conservatively check the OperationAction.
LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
- !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
+ !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
+ (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
+ TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
return SDValue();
SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
@@ -11782,11 +12380,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND ||
- Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
"Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
@@ -12052,8 +12646,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
// and/or/xor
SDValue N0 = N->getOperand(0);
- if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) ||
+ if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
N0.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
return SDValue();
@@ -12449,11 +13042,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+ // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
+
// fold (sext (sext_inreg x)) -> (sext (trunc x))
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
SDValue N00 = N0.getOperand(0);
EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
- if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) {
+ if (N00.getOpcode() == ISD::TRUNCATE &&
+ (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
}
@@ -12532,8 +13133,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
- if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) &&
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -12630,45 +13230,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
}
-// isTruncateOf - If N is a truncate of some other value, return true, record
-// the value being truncated in Op and which of Op's bits are zero/one in Known.
-// This function computes KnownBits to avoid a duplicated call to
-// computeKnownBits in the caller.
-static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
- KnownBits &Known) {
- if (N->getOpcode() == ISD::TRUNCATE) {
- Op = N->getOperand(0);
- Known = DAG.computeKnownBits(Op);
- return true;
- }
-
- if (N.getOpcode() != ISD::SETCC ||
- N.getValueType().getScalarType() != MVT::i1 ||
- cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
- return false;
-
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType());
-
- if (isNullOrNullSplat(Op0))
- Op = Op1;
- else if (isNullOrNullSplat(Op1))
- Op = Op0;
- else
- return false;
-
- Known = DAG.computeKnownBits(Op);
-
- return (Known.Zero | 1).isAllOnes();
-}
-
/// Given an extending node with a pop-count operand, if the target does not
/// support a pop-count in the narrow source type but does support it in the
/// destination type, widen the pop-count to the destination type.
@@ -12722,14 +13289,15 @@ static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
return FoldedVOp;
// zext(undef) = 0
if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -12737,7 +13305,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+
+ // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
N0.getOperand(0));
// fold (zext (truncate x)) -> (zext x) or
@@ -12754,7 +13328,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
std::min(Op.getScalarValueSizeInBits(),
VT.getScalarSizeInBits()));
if (TruncatedBits.isSubsetOf(Known.Zero))
- return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ return DAG.getZExtOrTrunc(Op, DL, VT);
}
// fold (zext (truncate x)) -> (and x, mask)
@@ -12780,9 +13354,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
- Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
+ Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
AddToWorklist(Op.getNode());
- SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
// Transfer the debug info; the new node is equivalent to N0.
DAG.transferDbgValues(N0, ZExtOrTrunc);
return ZExtOrTrunc;
@@ -12790,9 +13364,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
- SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
AddToWorklist(Op.getNode());
- SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
+ SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
// We may safely transfer the debug info describing the truncate node over
// to the equivalent and operation.
DAG.transferDbgValues(N0, And);
@@ -12811,7 +13385,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -12836,8 +13409,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
// additional users.
- if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) &&
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -12865,7 +13437,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN00->getMemoryVT(),
LN00->getMemOperand());
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
@@ -12919,7 +13490,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// that matter). Check to see that they are the same size. If so, we know
// that the element size of the sext'd result matches the element size of
// the compare operands.
- SDLoc DL(N);
if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
@@ -12939,7 +13509,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
- SDLoc DL(N);
EVT N0VT = N0.getValueType();
EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
@@ -12952,29 +13521,29 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
- N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N0.hasOneUse()) {
+ !TLI.isZExtFree(N0, VT)) {
+ SDValue ShVal = N0.getOperand(0);
SDValue ShAmt = N0.getOperand(1);
- if (N0.getOpcode() == ISD::SHL) {
- SDValue InnerZExt = N0.getOperand(0);
- // If the original shl may be shifting out bits, do not perform this
- // transformation.
- unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
- InnerZExt.getOperand(0).getValueSizeInBits();
- if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
- return SDValue();
- }
-
- SDLoc DL(N);
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
+ if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ // TODO: Add MaskedValueIsZero check.
+ unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
+ ShVal.getOperand(0).getValueSizeInBits();
+ if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
+ return SDValue();
+ }
- // Ensure that the shift amount is wide enough for the shifted value.
- if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
- ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
- return DAG.getNode(N0.getOpcode(), DL, VT,
- DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
- ShAmt);
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
+ }
+ }
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
@@ -12986,7 +13555,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue V = widenAbs(N, DAG))
return V;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
@@ -13011,6 +13580,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
+ // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -13147,7 +13724,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
@@ -13305,7 +13882,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
const APInt &Mask = AndC->getAPIntValue();
unsigned ActiveBits = 0;
if (Mask.isMask()) {
- ActiveBits = Mask.countTrailingOnes();
+ ActiveBits = Mask.countr_one();
} else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
HasShiftedOffset = true;
} else {
@@ -13373,8 +13950,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
isa<ConstantSDNode>(Mask->getOperand(1))) {
const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
- EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
- ShiftMask.countTrailingOnes());
+ EVT MaskedVT =
+ EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
// If the mask is smaller, recompute the type.
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
@@ -13520,9 +14097,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
// if x is small enough or if we know that x has more than 1 sign bit and the
// sign_extend_inreg is extending from one of them.
- if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
unsigned DstElts = N0.getValueType().getVectorMinNumElements();
@@ -13543,7 +14118,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getScalarValueSizeInBits() == ExtVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
@@ -13690,9 +14265,7 @@ foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
Src.getValueType().getVectorElementType(),
VT.getVectorElementCount());
- assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
"Expected EXTEND_VECTOR_INREG dag node in input!");
// Profitability check: our operand must be an one-use CONCAT_VECTORS.
@@ -13752,11 +14325,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// fold (truncate c1) -> c1
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
- if (C.getNode() != N)
- return C;
- }
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
+ return C;
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
@@ -13860,6 +14430,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
return V;
+ if (SDValue ABD = foldABSToABD(N))
+ return ABD;
+
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
@@ -14036,12 +14609,13 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
break;
case ISD::ADDE:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
- // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // (trunc uaddo_carry(X, Y, Carry)) ->
+ // (uaddo_carry trunc(X), trunc(Y), Carry)
// When the adde's carry is not used.
- // We only do for addcarry before legalize operation
- if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ // We only do for uaddo_carry before legalize operation
+ if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
TLI.isOperationLegal(N0.getOpcode(), VT)) &&
N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
SDLoc DL(N);
@@ -14114,18 +14688,19 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
-static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
// If this is not a bitcast to an FP type or if the target doesn't have
// IEEE754-compliant FP logic, we're done.
EVT VT = N->getValueType(0);
- if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+
+ if (!VT.isFloatingPoint())
return SDValue();
// TODO: Handle cases where the integer constant is a different scalar
// bitwidth to the FP.
- SDValue N0 = N->getOperand(0);
- EVT SourceVT = N0.getValueType();
if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
@@ -14148,6 +14723,19 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+ if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
+ return SDValue();
+
+ // This needs to be the inverse of logic in foldSignChangeInBitcast.
+ // FIXME: I don't think looking for bitcast intrinsically makes sense, but
+ // removing this would require more changes.
+ auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
+ if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
+ return true;
+
+ return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+ };
+
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
@@ -14155,9 +14743,9 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
SDValue LogicOp0 = N0.getOperand(0);
ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
- LogicOp0.getOpcode() == ISD::BITCAST &&
- LogicOp0.getOperand(0).getValueType() == VT) {
- SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+ IsBitCastOrFree(LogicOp0, VT)) {
+ SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
NumFPLogicOpsConv++;
if (N0.getOpcode() == ISD::OR)
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
@@ -14209,6 +14797,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST)
return DAG.getBitcast(VT, N0.getOperand(0));
+ // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
+ // iff the current bitwise logicop type isn't legal
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
+ !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
+ auto IsFreeBitcast = [VT](SDValue V) {
+ return (V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType() == VT) ||
+ (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
+ V->hasOneUse());
+ };
+ if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
+ DAG.getBitcast(VT, N0.getOperand(0)),
+ DAG.getBitcast(VT, N0.getOperand(1)));
+ }
+
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
@@ -14437,7 +15041,9 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0->getNumValues() != 1 || !N0->hasOneUse())
return SDValue();
- bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR;
+ bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
+ N0.getOpcode() == ISD::BUILD_PAIR ||
+ N0.getOpcode() == ISD::CONCAT_VECTORS;
SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
@@ -14474,6 +15080,10 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
}
}
+ // This node has been merged with another.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ return SDValue(N, 0);
+
// The whole node may have been updated, so the value we were holding
// may no longer be valid. Re-fetch the operand we're `freeze`ing.
N0 = N->getOperand(0);
@@ -14585,21 +15195,26 @@ static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
}
/// Try to perform FMA combining on a given FADD node.
+template <class MatchContextClass>
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
-
+ MatchContextClass matcher(DAG, TLI, N);
const TargetOptions &Options = DAG.getTarget().Options;
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -14613,6 +15228,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
+ // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
+ // beneficial. It does not reduce latency. It increases register pressure. It
+ // replaces an fadd with an fma which is a more complex instruction, so is
+ // likely to have a larger encoding, use more functional units, etc.
+ if (N0 == N1)
+ return SDValue();
+
if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
@@ -14621,14 +15243,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
auto isFusedOp = [&](SDValue N) {
- unsigned Opcode = N.getOpcode();
- return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
};
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
- auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
- if (N.getOpcode() != ISD::FMUL)
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
@@ -14641,15 +15262,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
- N0.getOperand(1), N1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
- N1.getOperand(1), N0);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
@@ -14673,10 +15294,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue TmpFMA = FMA;
while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
SDValue FMul = TmpFMA->getOperand(2);
- if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
+ if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
SDValue C = FMul.getOperand(0);
SDValue D = FMul.getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
// Replacing the inner FMul could cause the outer FMA to be simplified
// away.
@@ -14690,29 +15311,29 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Look through FP_EXTEND nodes to do more combining.
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- N1);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
}
}
// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
- N0);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -14722,15 +15343,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, X, Y,
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -14749,12 +15370,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
- return DAG.getNode(
- PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -14810,20 +15432,26 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
/// Try to perform FMA combining on a given FSUB node.
+template <class MatchContextClass>
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
-
+ MatchContextClass matcher(DAG, TLI, N);
const TargetOptions &Options = DAG.getTarget().Options;
+
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -14847,8 +15475,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
- auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
- if (N.getOpcode() != ISD::FMUL)
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
@@ -14856,8 +15484,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
- XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
+ XY.getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
@@ -14866,9 +15495,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
- YZ.getOperand(1), X);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
+ YZ.getOperand(1), X);
}
return SDValue();
};
@@ -14893,44 +15523,46 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
+ if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ return matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
}
}
// fold (fsub x, (fpext (fmul y, z)))
// -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
+ matcher.getNode(
+ ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -14940,19 +15572,20 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
+ if (matcher.match(N00, ISD::FNEG)) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
- N1));
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -14963,24 +15596,25 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FNEG) {
+ if (matcher.match(N0, ISD::FNEG)) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N00, ISD::FP_EXTEND)) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
- N1));
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
- auto isReassociable = [Options](SDNode *N) {
+ auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
@@ -14990,8 +15624,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
};
auto isFusedOp = [&](SDValue N) {
- unsigned Opcode = N.getOpcode();
- return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
};
// More folding opportunities when target permits.
@@ -15002,12 +15635,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
- N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -15017,29 +15650,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
SDValue N020 = N02.getOperand(0);
if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(
+ matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -15050,29 +15684,29 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- DAG.getNode(
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -15080,13 +15714,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
@@ -15096,7 +15732,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
+ if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -15106,15 +15742,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
@@ -15217,6 +15854,17 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -15394,10 +16042,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getConstantFP(4.0, DL, VT));
}
}
+
+ // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
+ VT, N0, N1, Flags))
+ return SD;
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- if (SDValue Fused = visitFADDForFMACombine(N)) {
+ if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -15507,7 +16160,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
- if (SDValue Fused = visitFSUBForFMACombine(N)) {
+ if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -15568,6 +16221,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
+
+ // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
+ VT, N0, N1, Flags))
+ return SD;
}
// fold (fmul X, 2.0) -> (fadd X, X)
@@ -15653,7 +16311,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFMA(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
@@ -15664,6 +16322,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ MatchContextClass matcher(DAG, TLI, N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
@@ -15672,7 +16331,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {
- return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
+ return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
@@ -15688,7 +16347,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
// FIXME: use fast math flags instead of Options.UnsafeFPMath
@@ -15699,70 +16358,74 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return N2;
}
+ // FIXME: Support splat of constant.
if (N0CFP && N0CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+ return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
+ return matcher.getNode(
+ ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (N0.getOpcode() == ISD::FMUL &&
+ if (matcher.match(N0, ISD::FMUL) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
- N2);
+ return matcher.getNode(
+ ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
}
}
// (fma x, -1, y) -> (fadd (fneg x), y)
+ // FIXME: Support splat of constant.
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
+ return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
- SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
+ SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
- return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
+ return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
// fma (fneg x), K, y -> fma x -K, y
- if (N0.getOpcode() == ISD::FNEG &&
+ if (matcher.match(N0, ISD::FNEG) &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
- ForCodeSize)))) {
- return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
+ (N1.hasOneUse() &&
+ !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
+ return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
+ // FIXME: Support splat of constant.
if (CanReassociate) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(
- ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(
- ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
+ if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT)));
}
}
@@ -15771,7 +16434,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FNEG, DL, VT, Neg);
+ return matcher.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
@@ -16043,27 +16706,30 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
-static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
- SDValue N1 = N->getOperand(1);
- if ((N1.getOpcode() == ISD::FP_EXTEND ||
- N1.getOpcode() == ISD::FP_ROUND)) {
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0).getValueType();
+/// Operands to the functions are the type of X and Y respectively.
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
+ // Always fold no-op FP casts.
+ if (XTy == YTy)
+ return true;
- // Always fold no-op FP casts.
- if (N1VT == N1Op0VT)
- return true;
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ if (YTy == MVT::f128)
+ return false;
- // Do not optimize out type conversion of f128 type yet.
- // For some targets like x86_64, configuration is changed to keep one f128
- // value in one SSE register, but instruction selection cannot handle
- // FCOPYSIGN on SSE registers yet.
- if (N1Op0VT == MVT::f128)
- return false;
+ return !YTy.isVector() || EnableVectorFCopySignExtendRound;
+}
- return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
- }
- return false;
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::FP_EXTEND &&
+ N1.getOpcode() != ISD::FP_ROUND)
+ return false;
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+ return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -16399,6 +17065,10 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
+ // Avoid folding legal fp_rounds into non-legal ones.
+ if (!hasOperation(ISD::FP_ROUND, VT))
+ return SDValue();
+
// Skip this folding if it results in an fp_round from f80 to f16.
//
// f80 to f16 always generates an expensive (and as yet, unimplemented)
@@ -16423,7 +17093,13 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
+ // Note: From a legality perspective, this is a two step transform. First,
+ // we duplicate the fp_round to the arguments of the copysign, then we
+ // eliminate the fp_round on Y. The second step requires an additional
+ // predicate to match the implementation above.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
+ N0.getValueType())) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
@@ -16529,6 +17205,15 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFFREXP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (ffrexp c1) -> ffrexp(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -16618,6 +17303,13 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
}
}
+ if (SDValue SD = reassociateReduction(
+ PropagatesNaN
+ ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
+ : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
+ Opc, SDLoc(N), VT, N0, N1, Flags))
+ return SD;
+
return SDValue();
}
@@ -16656,6 +17348,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1->getOperand(0), N2);
}
+ // Variant of the previous fold where there is a SETCC in between:
+ // BRCOND(SETCC(FREEZE(X), CONST, Cond))
+ // =>
+ // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
+ // =>
+ // BRCOND(SETCC(X, CONST, Cond))
+ // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
+ // isn't equivalent to true or false.
+ // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
+ // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
+ if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
+ SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
+ ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
+ ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
+ bool Updated = false;
+
+ // Is 'X Cond C' always true or false?
+ auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
+ (Cond == ISD::SETLT && C->isMinSignedValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
+ (Cond == ISD::SETGT && C->isMaxSignedValue());
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
+ (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
+ (Cond == ISD::SETGE && C->isMinSignedValue());
+ return True || False;
+ };
+
+ if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
+ if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
+ S0 = S0->getOperand(0);
+ Updated = true;
+ }
+ }
+ if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
+ if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
+ S1 = S1->getOperand(0);
+ Updated = true;
+ }
+ }
+
+ if (Updated)
+ return DAG.getNode(
+ ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
+ }
+
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
@@ -17288,11 +18029,53 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
return false;
}
+StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
+ int64_t &Offset) {
+ SDValue Chain = LD->getOperand(0);
+
+ // Look through CALLSEQ_START.
+ if (Chain.getOpcode() == ISD::CALLSEQ_START)
+ Chain = Chain->getOperand(0);
+
+ StoreSDNode *ST = nullptr;
+ SmallVector<SDValue, 8> Aliases;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ // Look for unique store within the TokenFactor.
+ for (SDValue Op : Chain->ops()) {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
+ if (!Store)
+ continue;
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ continue;
+ // Make sure the store is not aliased with any nodes in TokenFactor.
+ GatherAllAliases(Store, Chain, Aliases);
+ if (Aliases.empty() ||
+ (Aliases.size() == 1 && Aliases.front().getNode() == Store))
+ ST = Store;
+ break;
+ }
+ } else {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (Store) {
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ ST = Store;
+ }
+ }
+
+ return ST;
+}
+
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
- StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ int64_t Offset;
+
+ StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
// TODO: Relax this restriction for unordered atomics (see D66309)
if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
return SDValue();
@@ -17309,8 +18092,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// 2. The store is scalable and the load is fixed width. We could
// potentially support a limited number of cases here, but there has been
// no cost-benefit analysis to prove it's worth it.
- bool LdStScalable = LDMemType.isScalableVector();
- if (LdStScalable != STMemType.isScalableVector())
+ bool LdStScalable = LDMemType.isScalableVT();
+ if (LdStScalable != STMemType.isScalableVT())
return SDValue();
// If we are dealing with scalable vectors on a big endian platform the
@@ -17320,12 +18103,6 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (LdStScalable && DAG.getDataLayout().isBigEndian())
return SDValue();
- BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
- BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
- int64_t Offset;
- if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
- return SDValue();
-
// Normalize for Endianness. After this Offset=0 will denote that the least
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
@@ -17682,7 +18459,7 @@ struct LoadedSlice {
/// Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
- unsigned SliceSize = getUsedBits().countPopulation();
+ unsigned SliceSize = getUsedBits().popcount();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
@@ -17867,9 +18644,9 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
return true;
// Get rid of the unused bits on the right.
- APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
// Get rid of the unused bits on the left.
- if (NarrowedUsedBits.countLeadingZeros())
+ if (NarrowedUsedBits.countl_zero())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
return NarrowedUsedBits.isAllOnes();
@@ -18125,14 +18902,14 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
// follow the sign bit for uniformity.
uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
- unsigned NotMaskLZ = countLeadingZeros(NotMask);
+ unsigned NotMaskLZ = llvm::countl_zero(NotMask);
if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
- unsigned NotMaskTZ = countTrailingZeros(NotMask);
+ unsigned NotMaskTZ = llvm::countr_zero(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
+ if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -18199,6 +18976,11 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
UseTruncStore = true;
else
return SDValue();
+
+ // Can't do this for indexed stores.
+ if (St->isIndexed())
+ return SDValue();
+
// Check that the target doesn't think this is a bad idea.
if (St->getMemOperand() &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
@@ -18309,8 +19091,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Imm ^= APInt::getAllOnes(BitWidth);
if (Imm == 0 || Imm.isAllOnes())
return SDValue();
- unsigned ShAmt = Imm.countTrailingZeros();
- unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned ShAmt = Imm.countr_zero();
+ unsigned MSB = BitWidth - Imm.countl_zero() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
// The narrowing should be profitable, the load/store operation should be
@@ -18527,6 +19309,30 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
return DAG.getTokenFactor(StoreDL, Chains);
}
+bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
+ const Value *UnderlyingObj = nullptr;
+ for (const auto &MemOp : StoreNodes) {
+ const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
+ // Pseudo value like stack frame has its own frame index and size, should
+ // not use the first store's frame index for other frames.
+ if (MMO->getPseudoValue())
+ return false;
+
+ if (!MMO->getValue())
+ return false;
+
+ const Value *Obj = getUnderlyingObject(MMO->getValue());
+
+ if (UnderlyingObj && UnderlyingObj != Obj)
+ return false;
+
+ if (!UnderlyingObj)
+ UnderlyingObj = Obj;
+ }
+
+ return true;
+}
+
bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector, bool UseTrunc) {
@@ -18678,13 +19484,21 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
// make sure we use trunc store if it's necessary to be legal.
+ // When generate the new widen store, if the first store's pointer info can
+ // not be reused, discard the pointer info except the address space because
+ // now the widen store can not be represented by the original pointer info
+ // which is for the narrow memory object.
SDValue NewStore;
if (!UseTrunc) {
- NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlign(), *Flags, AAInfo);
+ NewStore = DAG.getStore(
+ NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ FirstInChain->getAlign(), *Flags, AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -18695,8 +19509,11 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), *Flags, AAInfo);
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
+ AAInfo);
}
// Replace all merged stores with the new store.
@@ -18749,6 +19566,8 @@ void DAGCombiner::getStoreMergeCandidates(
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
+ return false;
SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
@@ -18774,6 +19593,9 @@ void DAGCombiner::getStoreMergeCandidates(
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
+ *OtherLd))
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
break;
@@ -19042,11 +19864,9 @@ bool DAGCombiner::tryStoreMergeOfConstants(
}
}
- // We only use vectors if the constant is known to be zero or the
- // target allows it and the function is not marked with the
- // noimplicitfloat attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ // We only use vectors if the target allows it and the function is not
+ // marked with the noimplicitfloat attribute.
+ if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
AllowVectors) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
@@ -19389,6 +20209,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// using the first's chain is acceptable.
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
AddToWorklist(NewStoreChain.getNode());
MachineMemOperand::Flags LdMMOFlags =
@@ -19397,10 +20218,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
if (IsNonTemporalLoad)
LdMMOFlags |= MachineMemOperand::MONonTemporal;
+ LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
+
MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
? MachineMemOperand::MONonTemporal
: MachineMemOperand::MONone;
+ StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
+
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(
@@ -19418,7 +20243,9 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
NewStore = DAG.getStore(
NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
@@ -19428,8 +20255,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ JointMemOpVT, FirstInChain->getAlign(),
+ FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
@@ -19465,7 +20294,7 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
// store since we know <vscale x 16 x i8> is exactly twice as large as
// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
EVT MemVT = St->getMemoryVT();
- if (MemVT.isScalableVector())
+ if (MemVT.isScalableVT())
return false;
if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;
@@ -19647,6 +20476,62 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
}
}
+// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
+//
+// If a store of a load with an element inserted into it has no other
+// uses in between the chain, then we can consider the vector store
+// dead and replace it with just the single scalar element store.
+SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
+ SDLoc DL(ST);
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Chain = ST->getChain();
+ if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
+ return SDValue();
+
+ SDValue Elt = Value.getOperand(1);
+ SDValue Idx = Value.getOperand(2);
+
+ // If the element isn't byte sized then we can't compute an offset
+ EVT EltVT = Elt.getValueType();
+ if (!EltVT.isByteSized())
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
+ if (!Ld || Ld->getBasePtr() != Ptr ||
+ ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
+ !ISD::isNormalStore(ST) ||
+ Ld->getAddressSpace() != ST->getAddressSpace() ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
+ return SDValue();
+
+ unsigned IsFast;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Elt.getValueType(), ST->getAddressSpace(),
+ ST->getAlign(), ST->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+ EVT PtrVT = Ptr.getValueType();
+
+ SDValue Offset =
+ DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
+ DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
+ MachinePointerInfo PointerInfo(ST->getAddressSpace());
+
+ // If the offset is a known constant then try to recover the pointer
+ // info
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
+ NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL);
+ PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+ }
+
+ return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
+ ST->getMemOperand()->getFlags());
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -19768,9 +20653,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
// If this is a load followed by a store to the same location, then the store
- // is dead/noop.
+ // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
+ // TODO: Add big-endian truncate support with test coverage.
// TODO: Can relax for unordered atomics (see D66309)
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
+ ? peekThroughTruncates(Value)
+ : Value;
+ if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
Ld->getAddressSpace() == ST->getAddressSpace() &&
@@ -19782,6 +20671,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // Try scalarizing vector stores of loads where we only change one element
+ if (SDValue NewST = replaceStoreOfInsertLoad(ST))
+ return NewST;
+
// TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
@@ -19796,22 +20689,32 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef() &&
- // BaseIndexOffset and the code below requires knowing the size
- // of a vector, so bail out if MemoryVT is scalable.
- !ST->getMemoryVT().isScalableVector() &&
- !ST1->getMemoryVT().isScalableVector() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
- const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
- const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
- unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
- unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
- // If this is a store who's preceding store to a subset of the current
- // location and no one other node is chained to that store we can
- // effectively drop the store. Do not remove stores to undef as they may
- // be used as data sinks.
- if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
- CombineTo(ST1, ST1->getChain());
- return SDValue();
+ // If we consider two stores and one smaller in size is a scalable
+ // vector type and another one a bigger size store with a fixed type,
+ // then we could not allow the scalable store removal because we don't
+ // know its final size in the end.
+ if (ST->getMemoryVT().isScalableVector() ||
+ ST1->getMemoryVT().isScalableVector()) {
+ if (ST1->getBasePtr() == Ptr &&
+ TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
+ ST->getMemoryVT().getStoreSize())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+ } else {
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they
+ // may be used as data sinks.
+ if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
+ ChainBase,
+ ST1->getMemoryVT().getFixedSizeInBits())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
}
}
}
@@ -20183,6 +21086,99 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
return DAG.getBitcast(VT, Shuf);
}
+// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
+// possible and the new load will be quick. We use more loads but less shuffles
+// and inserts.
+SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
+ EVT VT = N->getValueType(0);
+
+ // InsIndex is expected to be the first of last lane.
+ if (!VT.isFixedLengthVector() ||
+ (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
+ return SDValue();
+
+ // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
+ // depending on the InsIndex.
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
+ SDValue Scalar = N->getOperand(1);
+ if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
+ return InsIndex == P.index() || P.value() < 0 ||
+ (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
+ (InsIndex == VT.getVectorNumElements() - 1 &&
+ P.value() == (int)P.index() + 1);
+ }))
+ return SDValue();
+
+ // We optionally skip over an extend so long as both loads are extended in the
+ // same way from the same type.
+ unsigned Extend = 0;
+ if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
+ Scalar.getOpcode() == ISD::SIGN_EXTEND ||
+ Scalar.getOpcode() == ISD::ANY_EXTEND) {
+ Extend = Scalar.getOpcode();
+ Scalar = Scalar.getOperand(0);
+ }
+
+ auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
+ if (!ScalarLoad)
+ return SDValue();
+
+ SDValue Vec = Shuffle->getOperand(0);
+ if (Extend) {
+ if (Vec.getOpcode() != Extend)
+ return SDValue();
+ Vec = Vec.getOperand(0);
+ }
+ auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
+ if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
+ return SDValue();
+
+ int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
+ if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
+ !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
+ return SDValue();
+
+ // Check that the offset between the pointers to produce a single continuous
+ // load.
+ if (InsIndex == 0) {
+ if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
+ -1))
+ return SDValue();
+ } else {
+ if (!DAG.areNonVolatileConsecutiveLoads(
+ VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
+ return SDValue();
+ }
+
+ // And that the new unaligned load will be fast.
+ unsigned IsFast = 0;
+ Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Vec.getValueType(), VecLoad->getAddressSpace(),
+ NewAlign, VecLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ // Calculate the new Ptr and create the new load.
+ SDLoc DL(N);
+ SDValue Ptr = ScalarLoad->getBasePtr();
+ if (InsIndex != 0)
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
+ DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
+ MachinePointerInfo PtrInfo =
+ InsIndex == 0 ? ScalarLoad->getPointerInfo()
+ : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
+
+ SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
+ ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
+ DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
+ return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -20254,6 +21250,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
+ if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
+ return Shuf;
+
// Attempt to convert an insert_vector_elt chain into a legal build_vector.
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// vXi1 vector - we don't need to recurse.
@@ -20349,6 +21348,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return NewShuffle;
}
+ // If all insertions are zero value, try to convert to AND mask.
+ // TODO: Do this for -1 with OR mask?
+ if (!LegalOperations && llvm::isNullConstant(InVal) &&
+ all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
+ count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
+ SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
+ SmallVector<SDValue, 8> Mask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask[I] = Ops[I] ? Zero : AllOnes;
+ return DAG.getNode(ISD::AND, DL, VT, CurVec,
+ DAG.getBuildVector(VT, DL, Mask));
+ }
+
// Failed to find a match in the chain - bail.
break;
}
@@ -20701,8 +21714,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
- TLI.isTypeLegal(VecVT) &&
- (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+ TLI.isTypeLegal(VecVT)) {
assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
VecVT.isFixedLengthVector()) &&
"BUILD_VECTOR used for scalable vectors");
@@ -20711,12 +21723,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();
- // Sometimes build_vector's scalar input types do not match result type.
- if (ScalarVT == InEltVT)
- return Elt;
+ if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
+ isNullConstant(Elt)) {
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (ScalarVT == InEltVT)
+ return Elt;
- // TODO: It may be useful to truncate if free if the build_vector implicitly
- // converts.
+ // TODO: It may be useful to truncate if free if the build_vector
+ // implicitly converts.
+ }
}
if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
@@ -21025,9 +22040,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
EVT OutScalarTy = VT.getScalarType();
- bool ValidTypes = SourceType != MVT::Other &&
- isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
- isPowerOf2_32(SourceType.getSizeInBits());
+ bool ValidTypes =
+ SourceType != MVT::Other &&
+ llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
+ llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
@@ -21157,7 +22173,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
}
// Only cast if the size is the same
- if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
+ if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
return SDValue();
return DAG.getBitcast(VT, Src);
@@ -21359,10 +22375,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
- if (i % ZextRatio == 0)
- ShufMask[i] = Extract.getConstantOperandVal(1);
- else
- ShufMask[i] = NumMaskElts;
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
@@ -21917,7 +22932,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
EVT OpVT = N->getOperand(0).getValueType();
// If the operands are legal vectors, leave them alone.
- if (TLI.isTypeLegal(OpVT))
+ if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
return SDValue();
SDLoc DL(N);
@@ -22273,7 +23288,13 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If the input is a concat_vectors, just make a larger concat by padding
// with smaller undefs.
- if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ //
+ // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
+ // here could cause an infinite loop. That legalizing happens when LegalDAG
+ // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
+ // scalable.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
+ !(LegalDAG && In.getValueType().isScalableVector())) {
unsigned NumOps = N->getNumOperands() * In.getNumOperands();
SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
@@ -22767,10 +23788,6 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
continue;
}
- // Profitability check: only deal with extractions from the first subvector.
- if (OpSubvecIdx != 0)
- return SDValue();
-
const std::pair<SDValue, int> DemandedSubvector =
std::make_pair(Op, OpSubvecIdx);
@@ -22800,6 +23817,14 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
if (DemandedSubvectors.empty())
return DAG.getUNDEF(NarrowVT);
+ // Profitability check: only deal with extractions from the first subvector
+ // unless the mask becomes an identity mask.
+ if (!ShuffleVectorInst::isIdentityMask(NewMask) ||
+ any_of(NewMask, [](int M) { return M < 0; }))
+ for (auto &DemandedSubvector : DemandedSubvectors)
+ if (DemandedSubvector.second != 0)
+ return SDValue();
+
// We still perform the exact same EXTRACT_SUBVECTOR, just on different
// operand[s]/index[es], so there is no point in checking for it's legality.
@@ -22975,7 +24000,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
- Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
return DAG.getBitcast(NVT, Src);
}
@@ -23450,9 +24475,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
- if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
- Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
- Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ if (!ISD::isExtVecInRegOpcode(Opcode))
return SDValue();
SDValue N00 = N0.getOperand(0);
@@ -23518,7 +24541,7 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
DemandedElts.setBit(Idx);
}
- assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?");
+ assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
APInt UndefElts;
if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
// Even if all demanded elements are splat, some of them could be undef.
@@ -24072,8 +25095,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
bool IsInLaneMask = true;
ArrayRef<int> Mask = SVN->getMask();
SmallVector<int, 16> ClearMask(NumElts, -1);
- APInt DemandedLHS = APInt::getNullValue(NumElts);
- APInt DemandedRHS = APInt::getNullValue(NumElts);
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
for (int I = 0; I != (int)NumElts; ++I) {
int M = Mask[I];
if (M < 0)
@@ -24086,12 +25109,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
// TODO: Should we try to mask with N1 as well?
- if (!IsInLaneMask &&
- (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) &&
- (DemandedLHS.isNullValue() ||
- DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
- (DemandedRHS.isNullValue() ||
- DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
+ if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
+ (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
+ (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
SDLoc DL(N);
EVT IntVT = VT.changeVectorElementTypeToInteger();
EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
@@ -24771,6 +25791,17 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPOp(SDNode *N) {
if (N->getOpcode() == ISD::VP_GATHER)
@@ -24792,8 +25823,17 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
// This is the only generic VP combine we support for now.
- if (!AreAllEltsDisabled)
+ if (!AreAllEltsDisabled) {
+ switch (N->getOpcode()) {
+ case ISD::VP_FADD:
+ return visitVP_FADD(N);
+ case ISD::VP_FSUB:
+ return visitVP_FSUB(N);
+ case ISD::VP_FMA:
+ return visitFMA<VPMatchContext>(N);
+ }
return SDValue();
+ }
// Binary operations can be replaced by UNDEF.
if (ISD::isVPBinaryOp(N->getOpcode()))
@@ -24814,6 +25854,97 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the memory, where FP state is written to, is used only in a single
+ // load operation.
+ LoadSDNode *LdNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
+ if (LdNode && LdNode != Ld)
+ return SDValue();
+ LdNode = Ld;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
+ return SDValue();
+
+ // Check if the loaded value is used only in a store operation.
+ StoreSDNode *StNode = nullptr;
+ for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
+ SDUse &U = I.getUse();
+ if (U.getResNo() == 0) {
+ if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
+ if (StNode)
+ return SDValue();
+ StNode = St;
+ } else {
+ return SDValue();
+ }
+ }
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node GET_FPENV_MEM, which uses the store address to write FP
+ // environment.
+ SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
+ StNode->getMemOperand());
+ CombineTo(StNode, Res, false);
+ return Res;
+}
+
+SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the address of FP state is used also in a store operation only.
+ StoreSDNode *StNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *St = dyn_cast<StoreSDNode>(U)) {
+ if (StNode && StNode != St)
+ return SDValue();
+ StNode = St;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
+ return SDValue();
+
+ // Check if the stored value is loaded from some location and the loaded
+ // value is used only in the store operation.
+ SDValue StValue = StNode->getValue();
+ auto *LdNode = dyn_cast<LoadSDNode>(StValue);
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node SET_FPENV_MEM, which uses the load address to read FP
+ // environment.
+ SDValue Res =
+ DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
+ LdNode->getMemOperand());
+ return Res;
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -24960,8 +26091,6 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
- EVT SrcVT = N0->getValueType(0);
- EVT SrcEltVT = SrcVT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// TODO: promote operation might be also good here?
@@ -24971,7 +26100,9 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
(N0.getOpcode() == ISD::SPLAT_VECTOR ||
TLI.isExtractVecEltCheap(VT, Index0)) &&
TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
- TLI.preferScalarizeSplat(Opcode)) {
+ TLI.preferScalarizeSplat(N)) {
+ EVT SrcVT = N0.getValueType();
+ EVT SrcEltVT = SrcVT.getVectorElementType();
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue Elt =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
@@ -25588,14 +26719,14 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
- if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
unsigned ShCt = AndMask.getBitWidth() - 1;
if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
+ DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is
@@ -25991,7 +27122,7 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
Reciprocal)) {
AddToWorklist(Est.getNode());
- if (Iterations)
+ if (Iterations > 0)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
@@ -26334,7 +27465,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// BaseIndexOffset assumes that offsets are fixed-size, which
// is not valid for scalable vectors where the offsets are
// scaled by `vscale`, so bail out early.
- if (St->getMemoryVT().isScalableVector())
+ if (St->getMemoryVT().isScalableVT())
return false;
// Add ST's interval.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2f2ae6e29855..f0affce7b6b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -59,6 +59,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -95,7 +96,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -454,8 +454,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
if (!TLI.isTypeLegal(VT)) {
// MVT::i1 is special. Allow AND, OR, or XOR because they
// don't require additional zeroing, which makes them easy.
- if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
- ISDOpcode == ISD::XOR))
+ if (VT == MVT::i1 && ISD::isBitwiseLogicOp(ISDOpcode))
VT = TLI.getTypeToTransformTo(I->getContext(), VT);
else
return false;
@@ -894,7 +893,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
bool FastISel::selectXRayCustomEvent(const CallInst *I) {
const auto &Triple = TM.getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
@@ -913,7 +912,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
bool FastISel::selectXRayTypedEvent(const CallInst *I) {
const auto &Triple = TM.getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
@@ -1209,6 +1208,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
+ if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
+ return true;
+
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
@@ -1216,13 +1218,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- // Byval arguments with frame indices were already handled after argument
- // lowering and before isel.
- const auto *Arg =
- dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
- if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
- return true;
-
std::optional<MachineOperand> Op;
if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -1277,60 +1272,85 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
- assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
+ DIExpression *Expr = DI->getExpression();
+ DILocalVariable *Var = DI->getVariable();
+ assert(Var->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
// DI is either undef or cannot produce a valid DBG_VALUE, so produce an
// undef DBG_VALUE to terminate any prior location.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U,
- DI->getVariable(), DI->getExpression());
- } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ Var, Expr);
+ return true;
+ }
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
// See if there's an expression to constant-fold.
- DIExpression *Expr = DI->getExpression();
if (Expr)
std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addCImm(CI)
.addImm(0U)
- .addMetadata(DI->getVariable())
+ .addMetadata(Var)
.addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addImm(CI->getZExtValue())
.addImm(0U)
- .addMetadata(DI->getVariable())
+ .addMetadata(Var)
.addMetadata(Expr);
- } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ return true;
+ }
+ if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(CF)
.addImm(0U)
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
- } else if (Register Reg = lookUpRegForValue(V)) {
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ return true;
+ }
+ if (const auto *Arg = dyn_cast<Argument>(V);
+ Arg && Expr && Expr->isEntryValue()) {
+ // As per the Verifier, this case is only valid for swift async Args.
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+
+ Register Reg = getRegForValue(Arg);
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (Reg == VirtReg || Reg == PhysReg) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II,
+ false /*IsIndirect*/, PhysReg, Var, Expr);
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << *DI << "\n");
+ return true;
+ }
+ if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
if (!FuncInfo.MF->useDebugInstrRef()) {
bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
- Reg, DI->getVariable(), DI->getExpression());
- } else {
- // If using instruction referencing, produce this as a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs.
- SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
- /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
- /* isKill */ false, /* isDead */ false,
- /* isUndef */ false, /* isEarlyClobber */ false,
- /* SubReg */ 0, /* isDebug */ true)});
- SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
- auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
- TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
- DI->getVariable(), NewExpr);
+ Reg, Var, Expr);
+ return true;
}
- } else {
- // We don't know how to handle other cases, so we drop.
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
+ Var, NewExpr);
+ return true;
}
+ // We don't know how to handle other cases, so we drop.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
case Intrinsic::dbg_label: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index c18cd39ed296..1d0a03ccfcdc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -83,7 +83,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- DA = DAG->getDivergenceAnalysis();
+ UA = DAG->getUniformityInfo();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -128,20 +128,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Type *Ty = AI->getAllocatedType();
- Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
- // The "specified" alignment is the alignment written on the alloca,
- // or the preferred alignment of the type if none is specified.
- //
- // (Unspecified alignment on allocas will be going away soon.)
- Align SpecifiedAlign = AI->getAlign();
-
- // If the preferred alignment of the type is higher than the specified
- // alignment of the alloca, promote the alignment, as long as it doesn't
- // require realigning the stack.
- //
- // FIXME: Do we really want to second-guess the IR in isel?
- Align Alignment =
- std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
+ Align Alignment = AI->getAlign();
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
@@ -165,9 +152,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
false, AI);
}
- // Scalable vectors may need a special StackID to distinguish
- // them from other (fixed size) stack objects.
- if (isa<ScalableVectorType>(Ty))
+ // Scalable vectors and structures that contain scalable vectors may
+ // need a special StackID to distinguish them from other (fixed size)
+ // stack objects.
+ if (Ty->isScalableTy())
MF->getFrameInfo().setStackID(FrameIndex,
TFI->getStackIDForScalableVectors());
@@ -305,18 +293,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
if (H.Handler)
- H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ H.Handler = MBBMap[cast<const BasicBlock *>(H.Handler)];
}
}
for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
if (UME.Cleanup)
- UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ UME.Cleanup = MBBMap[cast<const BasicBlock *>(UME.Cleanup)];
for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
- const auto *BB = UME.Handler.get<const BasicBlock *>();
+ const auto *BB = cast<const BasicBlock *>(UME.Handler);
UME.Handler = MBBMap[BB];
}
for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
- const auto *BB = CME.Handler.get<const BasicBlock *>();
+ const auto *BB = cast<const BasicBlock *>(CME.Handler);
CME.Handler = MBBMap[BB];
}
} else if (Personality == EHPersonality::Wasm_CXX) {
@@ -326,18 +314,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Map all BB references in the Wasm EH data to MBBs.
DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
for (auto &KV : EHInfo.SrcToUnwindDest) {
- const auto *Src = KV.first.get<const BasicBlock *>();
- const auto *Dest = KV.second.get<const BasicBlock *>();
+ const auto *Src = cast<const BasicBlock *>(KV.first);
+ const auto *Dest = cast<const BasicBlock *>(KV.second);
SrcToUnwindDest[MBBMap[Src]] = MBBMap[Dest];
}
EHInfo.SrcToUnwindDest = std::move(SrcToUnwindDest);
DenseMap<BBOrMBB, SmallPtrSet<BBOrMBB, 4>> UnwindDestToSrcs;
for (auto &KV : EHInfo.UnwindDestToSrcs) {
- const auto *Dest = KV.first.get<const BasicBlock *>();
+ const auto *Dest = cast<const BasicBlock *>(KV.first);
UnwindDestToSrcs[MBBMap[Dest]] = SmallPtrSet<BBOrMBB, 4>();
for (const auto P : KV.second)
UnwindDestToSrcs[MBBMap[Dest]].insert(
- MBBMap[P.get<const BasicBlock *>()]);
+ MBBMap[cast<const BasicBlock *>(P)]);
}
EHInfo.UnwindDestToSrcs = std::move(UnwindDestToSrcs);
}
@@ -361,6 +349,7 @@ void FunctionLoweringInfo::clear() {
StatepointStackSlots.clear();
StatepointRelocationMaps.clear();
PreferredExtendType.clear();
+ PreprocessedDbgDeclares.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
@@ -394,8 +383,8 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
}
Register FunctionLoweringInfo::CreateRegs(const Value *V) {
- return CreateRegs(V->getType(), DA && DA->isDivergent(V) &&
- !TLI->requiresUniformRegister(*MF, V));
+ return CreateRegs(V->getType(), UA && UA->isDivergent(V) &&
+ !TLI->requiresUniformRegister(*MF, V));
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
@@ -517,7 +506,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
}
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
- DestLOI.Known = KnownBits::commonBits(DestLOI.Known, SrcLOI->Known);
+ DestLOI.Known = DestLOI.Known.intersectWith(SrcLOI->Known);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 338172e4e10a..4e7895c0b3cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1078,6 +1078,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ if (Flags.hasUnpredictable())
+ MI->setFlag(MachineInstr::MIFlag::Unpredictable);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -1161,6 +1164,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
+ // Add rounding control registers as implicit def for function call.
+ if (II.isCall() && MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ UsedRegs.push_back(Reg);
+ }
+
// Finally mark unused registers as dead.
if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c3106216a060..61fc31715d71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -41,7 +42,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -133,8 +133,11 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ void ExpandFrexpLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
SmallVectorImpl<SDValue> &Results);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -172,6 +175,9 @@ private:
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandFNEG(SDNode *Node) const;
+ SDValue expandLdexp(SDNode *Node) const;
+ SDValue expandFrexp(SDNode *Node) const;
+
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
@@ -880,8 +886,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
- if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
- TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ if ((LoadVT.isFloatingPoint() == SrcVT.isFloatingPoint()) &&
+ (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT))) {
// If we are loading a legal type, this is a non-extload followed by a
// full extend.
ISD::LoadExtType MidExtType =
@@ -999,6 +1006,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::SET_FPENV:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
case ISD::FP_TO_FP16:
case ISD::FP_TO_BF16:
case ISD::SINT_TO_FP:
@@ -1199,6 +1210,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
case ISD::IS_FPCLASS:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
@@ -1546,7 +1559,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
auto &DataLayout = DAG.getDataLayout();
// Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ MVT LoadTy = TLI.getRegisterType(MVT::i8);
// First create a temporary that is aligned for both the load and store.
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -2015,23 +2028,14 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
return DAG.getSplatBuildVector(VT, DL, SplatVal);
}
-// Expand a node into a call to a libcall. If the result value
-// does not fit into a register, return the lo part and set the hi part to the
-// by-reg argument. If it does fit into a single register, return the result
-// and leave the Hi part unset.
-SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+// Expand a node into a call to a libcall, returning the value as the first
+// result and the chain as the second. If the result value does not fit into a
+// register, return the lo part and set the hi part to the by-reg argument in
+// the first. If it does fit into a single register, return the result and
+// leave the Hi part unset.
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args,
bool isSigned) {
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (const SDValue &Op : Node->op_values()) {
- EVT ArgVT = Op.getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op;
- Entry.Ty = ArgTy;
- Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
- Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
- Args.push_back(Entry);
- }
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy(DAG.getDataLayout()));
@@ -2070,11 +2074,69 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
if (!CallInfo.second.getNode()) {
LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
// It's a tailcall, return the chain (which is the DAG root).
- return DAG.getRoot();
+ return {DAG.getRoot(), DAG.getRoot()};
}
LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
- return CallInfo.first;
+ return CallInfo;
+}
+
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Entry.IsZExt = !Entry.IsSExt;
+ Args.push_back(Entry);
+ }
+
+ return ExpandLibCall(LC, Node, std::move(Args), isSigned);
+}
+
+void SelectionDAGLegalize::ExpandFrexpLibCall(
+ SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ExpVT = Node->getValueType(1);
+
+ SDValue FPOp = Node->getOperand(0);
+
+ EVT ArgVT = FPOp.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListEntry FPArgEntry;
+ FPArgEntry.Node = FPOp;
+ FPArgEntry.Ty = ArgTy;
+
+ SDValue StackSlot = DAG.CreateStackTemporary(ExpVT);
+ TargetLowering::ArgListEntry PtrArgEntry;
+ PtrArgEntry.Node = StackSlot;
+ PtrArgEntry.Ty = PointerType::get(*DAG.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace());
+
+ TargetLowering::ArgListTy Args = {FPArgEntry, PtrArgEntry};
+
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT);
+ auto [Call, Chain] = ExpandLibCall(LC, Node, std::move(Args), false);
+
+ // FIXME: Get type of int for libcall declaration and cast
+
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(ExpVT, dl, Chain, StackSlot, PtrInfo);
+ SDValue OutputChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ LoadExp.getValue(1), DAG.getRoot());
+ DAG.setRoot(OutputChain);
+
+ Results.push_back(Call);
+ Results.push_back(LoadExp);
}
void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
@@ -2095,7 +2157,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
Results.push_back(Tmp.first);
Results.push_back(Tmp.second);
} else {
- SDValue Tmp = ExpandLibCall(LC, Node, false);
+ SDValue Tmp = ExpandLibCall(LC, Node, false).first;
Results.push_back(Tmp);
}
}
@@ -2129,7 +2191,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
case MVT::i64: LC = Call_I64; break;
case MVT::i128: LC = Call_I128; break;
}
- return ExpandLibCall(LC, Node, isSigned);
+ return ExpandLibCall(LC, Node, isSigned).first;
}
/// Expand the node to a libcall based on first argument type (for instance
@@ -2309,6 +2371,237 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
}
+SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue X = Node->getOperand(0);
+ SDValue N = Node->getOperand(1);
+ EVT ExpVT = N.getValueType();
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP) // TODO
+ return SDValue();
+
+ SDNodeFlags NSW;
+ NSW.setNoSignedWrap(true);
+ SDNodeFlags NUW_NSW;
+ NUW_NSW.setNoUnsignedWrap(true);
+ NUW_NSW.setNoSignedWrap(true);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ExpVT);
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+
+ const APFloat::ExponentType MaxExpVal = APFloat::semanticsMaxExponent(FltSem);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const int Precision = APFloat::semanticsPrecision(FltSem);
+
+ const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT);
+ const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+
+ const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT);
+
+ const APFloat One(FltSem, "1.0");
+ APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven);
+
+ // Offset by precision to avoid denormal range.
+ APFloat ScaleDownK =
+ scalbn(One, MinExpVal + Precision, APFloat::rmNearestTiesToEven);
+
+ // TODO: Should really introduce control flow and use a block for the >
+ // MaxExp, < MinExp cases
+
+ // First, handle exponents Exp > MaxExp and scale down.
+ SDValue NGtMaxExp = DAG.getSetCC(dl, SetCCVT, N, MaxExp, ISD::SETGT);
+
+ SDValue DecN0 = DAG.getNode(ISD::SUB, dl, ExpVT, N, MaxExp, NSW);
+ SDValue ClampMaxVal = DAG.getConstant(3 * MaxExpVal, dl, ExpVT);
+ SDValue ClampN_Big = DAG.getNode(ISD::SMIN, dl, ExpVT, N, ClampMaxVal);
+ SDValue DecN1 =
+ DAG.getNode(ISD::SUB, dl, ExpVT, ClampN_Big, DoubleMaxExp, NSW);
+
+ SDValue ScaleUpTwice =
+ DAG.getSetCC(dl, SetCCVT, N, DoubleMaxExp, ISD::SETUGT);
+
+ const SDValue ScaleUpVal = DAG.getConstantFP(ScaleUpK, dl, VT);
+ SDValue ScaleUp0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleUpVal);
+ SDValue ScaleUp1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleUp0, ScaleUpVal);
+
+ SDValue SelectN_Big =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleUpTwice, DecN1, DecN0);
+ SDValue SelectX_Big =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleUpTwice, ScaleUp1, ScaleUp0);
+
+ // Now handle exponents Exp < MinExp
+ SDValue NLtMinExp = DAG.getSetCC(dl, SetCCVT, N, MinExp, ISD::SETLT);
+
+ SDValue Increment0 = DAG.getConstant(-(MinExpVal + Precision), dl, ExpVT);
+ SDValue Increment1 = DAG.getConstant(-2 * (MinExpVal + Precision), dl, ExpVT);
+
+ SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW);
+
+ SDValue ClampMinVal =
+ DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
+ SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal);
+ SDValue IncN1 =
+ DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW);
+
+ const SDValue ScaleDownVal = DAG.getConstantFP(ScaleDownK, dl, VT);
+ SDValue ScaleDown0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleDownVal);
+ SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal);
+
+ SDValue ScaleDownTwice = DAG.getSetCC(
+ dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT),
+ ISD::SETULT);
+
+ SDValue SelectN_Small =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0);
+ SDValue SelectX_Small =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleDownTwice, ScaleDown1, ScaleDown0);
+
+ // Now combine the two out of range exponent handling cases with the base
+ // case.
+ SDValue NewX = DAG.getNode(
+ ISD::SELECT, dl, VT, NGtMaxExp, SelectX_Big,
+ DAG.getNode(ISD::SELECT, dl, VT, NLtMinExp, SelectX_Small, X));
+
+ SDValue NewN = DAG.getNode(
+ ISD::SELECT, dl, ExpVT, NGtMaxExp, SelectN_Big,
+ DAG.getNode(ISD::SELECT, dl, ExpVT, NLtMinExp, SelectN_Small, N));
+
+ SDValue BiasedN = DAG.getNode(ISD::ADD, dl, ExpVT, NewN, MaxExp, NSW);
+
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, ExpVT, dl);
+ SDValue CastExpToValTy = DAG.getZExtOrTrunc(BiasedN, dl, AsIntVT);
+
+ SDValue AsInt = DAG.getNode(ISD::SHL, dl, AsIntVT, CastExpToValTy,
+ ExponentShiftAmt, NUW_NSW);
+ SDValue AsFP = DAG.getNode(ISD::BITCAST, dl, VT, AsInt);
+ return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP);
+}
+
+SDValue SelectionDAGLegalize::expandFrexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ SDValue Val = Node->getOperand(0);
+ EVT VT = Val.getValueType();
+ EVT ExpVT = Node->getValueType(1);
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ const unsigned BitSize = VT.getScalarSizeInBits();
+
+ // TODO: Could introduce control flow and skip over the denormal handling.
+
+ // scale_up = fmul value, scalbn(1.0, precision + 1)
+ // extracted_exp = (bitcast value to uint) >> precision - 1
+ // biased_exp = extracted_exp + min_exp
+ // extracted_fract = (bitcast value to uint) & (fract_mask | sign_mask)
+ //
+ // is_denormal = val < smallest_normalized
+ // computed_fract = is_denormal ? scale_up : extracted_fract
+ // computed_exp = is_denormal ? biased_exp + (-precision - 1) : biased_exp
+ //
+ // result_0 = (!isfinite(val) || iszero(val)) ? val : computed_fract
+ // result_1 = (!isfinite(val) || iszero(val)) ? 0 : computed_exp
+
+ SDValue NegSmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, true).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ SDValue SmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, false).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ // Masks out the exponent bits.
+ SDValue ExpMask =
+ DAG.getConstant(APFloat::getInf(FltSem).bitcastToAPInt(), dl, AsIntVT);
+
+ // Mask out the exponent part of the value.
+ //
+ // e.g, for f32 FractSignMaskVal = 0x807fffff
+ APInt FractSignMaskVal = APInt::getBitsSet(BitSize, 0, Precision - 1);
+ FractSignMaskVal.setBit(BitSize - 1); // Set the sign bit
+
+ APInt SignMaskVal = APInt::getSignedMaxValue(BitSize);
+ SDValue SignMask = DAG.getConstant(SignMaskVal, dl, AsIntVT);
+
+ SDValue FractSignMask = DAG.getConstant(FractSignMaskVal, dl, AsIntVT);
+
+ const APFloat One(FltSem, "1.0");
+ // Scale a possible denormal input.
+ // e.g., for f64, 0x1p+54
+ APFloat ScaleUpKVal =
+ scalbn(One, Precision + 1, APFloat::rmNearestTiesToEven);
+
+ SDValue ScaleUpK = DAG.getConstantFP(ScaleUpKVal, dl, VT);
+ SDValue ScaleUp = DAG.getNode(ISD::FMUL, dl, VT, Val, ScaleUpK);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue AsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, Val);
+
+ SDValue Abs = DAG.getNode(ISD::AND, dl, AsIntVT, AsInt, SignMask);
+
+ SDValue AddNegSmallestNormal =
+ DAG.getNode(ISD::ADD, dl, AsIntVT, Abs, NegSmallestNormalizedInt);
+ SDValue DenormOrZero = DAG.getSetCC(dl, SetCCVT, AddNegSmallestNormal,
+ NegSmallestNormalizedInt, ISD::SETULE);
+
+ SDValue IsDenormal =
+ DAG.getSetCC(dl, SetCCVT, Abs, SmallestNormalizedInt, ISD::SETULT);
+
+ SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+ SDValue Zero = DAG.getConstant(0, dl, ExpVT);
+
+ SDValue ScaledAsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, ScaleUp);
+ SDValue ScaledSelect =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ScaledAsInt, AsInt);
+
+ SDValue ExpMaskScaled =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledAsInt, ExpMask);
+
+ SDValue ScaledValue =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ExpMaskScaled, Abs);
+
+ // Extract the exponent bits.
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, AsIntVT, dl);
+ SDValue ShiftedExp =
+ DAG.getNode(ISD::SRL, dl, AsIntVT, ScaledValue, ExponentShiftAmt);
+ SDValue Exp = DAG.getSExtOrTrunc(ShiftedExp, dl, ExpVT);
+
+ SDValue NormalBiasedExp = DAG.getNode(ISD::ADD, dl, ExpVT, Exp, MinExp);
+ SDValue DenormalOffset = DAG.getConstant(-Precision - 1, dl, ExpVT);
+ SDValue DenormalExpBias =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, IsDenormal, DenormalOffset, Zero);
+
+ SDValue MaskedFractAsInt =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledSelect, FractSignMask);
+ const APFloat Half(FltSem, "0.5");
+ SDValue FPHalf = DAG.getConstant(Half.bitcastToAPInt(), dl, AsIntVT);
+ SDValue Or = DAG.getNode(ISD::OR, dl, AsIntVT, MaskedFractAsInt, FPHalf);
+ SDValue MaskedFract = DAG.getNode(ISD::BITCAST, dl, VT, Or);
+
+ SDValue ComputedExp =
+ DAG.getNode(ISD::ADD, dl, ExpVT, NormalBiasedExp, DenormalExpBias);
+
+ SDValue Result0 =
+ DAG.getNode(ISD::SELECT, dl, VT, DenormOrZero, Val, MaskedFract);
+
+ SDValue Result1 =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, DenormOrZero, Zero, ComputedExp);
+
+ return DAG.getMergeValues({Result0, Result1}, dl);
+}
+
/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
@@ -2365,10 +2658,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Load =
DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo());
// FP constant to bias correct the final result
- SDValue Bias = DAG.getConstantFP(isSigned ?
- BitsToDouble(0x4330000080000000ULL) :
- BitsToDouble(0x4330000000000000ULL),
- dl, MVT::f64);
+ SDValue Bias = DAG.getConstantFP(
+ isSigned ? llvm::bit_cast<double>(0x4330000080000000ULL)
+ : llvm::bit_cast<double>(0x4330000000000000ULL),
+ dl, MVT::f64);
// Subtract the bias and get the final result.
SDValue Sub;
SDValue Result;
@@ -2696,6 +2989,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if ((Tmp1 = TLI.expandABD(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTPOP:
if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
@@ -3241,6 +3539,36 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: {
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getLDEXP(VT);
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandLdexp(Node)) {
+ Results.push_back(Expanded);
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP)
+ Results.push_back(Expanded.getValue(1));
+ }
+
+ break;
+ }
+ case ISD::FFREXP: {
+ RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0));
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandFrexp(Node)) {
+ Results.push_back(Expanded);
+ Results.push_back(Expanded.getValue(1));
+ }
+ break;
+ }
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
@@ -3477,13 +3805,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// if we were allowed to generate libcalls to division functions of illegal
// type. But we cannot do that.
llvm_unreachable("Cannot expand DIVFIX!");
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: {
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue Carry = Node->getOperand(2);
- bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
+ bool IsAdd = Node->getOpcode() == ISD::UADDO_CARRY;
// Initial add of the 2 operands.
unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
@@ -3628,9 +3956,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
} else {
// We test only the i1 bit. Skip the AND if UNDEF or another AND.
if (Tmp2.isUndef() ||
- (Tmp2.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
- cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ (Tmp2.getOpcode() == ISD::AND && isOneConstant(Tmp2.getOperand(1))))
Tmp3 = Tmp2;
else
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
@@ -3864,6 +4190,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
break;
case ISD::GLOBAL_OFFSET_TABLE:
@@ -4029,6 +4357,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128, Results);
break;
+ // FIXME: We do not have libcalls for FMAXIMUM and FMINIMUM. So, we cannot use
+ // libcall legalization for these nodes, but there is no default expasion for
+ // these nodes either (see PR63267 for example).
case ISD::FMAXNUM:
case ISD::STRICT_FMAXNUM:
ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
@@ -4135,6 +4466,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUNDEVEN_F128,
RTLIB::ROUNDEVEN_PPCF128, Results);
break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP:
+ ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80,
+ RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results);
+ break;
+ case ISD::FFREXP: {
+ ExpandFrexpLibCall(Node, Results);
+ break;
+ }
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
@@ -4241,7 +4581,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
}
break;
case ISD::STRICT_FP16_TO_FP: {
@@ -4259,14 +4599,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
- Results.push_back(ExpandLibCall(LC, Node, false));
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
break;
}
case ISD::FP_TO_BF16: {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16");
- Results.push_back(ExpandLibCall(LC, Node, false));
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
break;
}
case ISD::STRICT_SINT_TO_FP:
@@ -4381,7 +4721,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(
ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(),
Node->getValueType(0)),
- Node, false));
+ Node, false).first);
break;
}
case ISD::STRICT_FP_EXTEND:
@@ -4447,16 +4787,39 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
default:
llvm_unreachable("LibCall explicitly requested, but not available");
case MVT::i32:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
break;
case MVT::i64:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
break;
case MVT::i128:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
break;
}
break;
+ case ISD::RESET_FPENV: {
+ // It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
+ // FE_DFL_ENV is defined as '((const fenv_t *) -1)' in glibc.
+ SDValue Ptr = DAG.getIntPtrConstant(-1LL, dl);
+ SDValue Chain = Node->getOperand(0);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, Ptr, Chain, dl));
+ break;
+ }
+ case ISD::GET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FEGETENV, EnvPtr, Chain, dl));
+ break;
+ }
+ case ISD::SET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl));
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4785,6 +5148,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::FPOW:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4841,6 +5206,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp4.getValue(1));
break;
case ISD::FCOPYSIGN:
+ case ISD::FLDEXP:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
@@ -4867,6 +5233,17 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp3);
Results.push_back(Tmp3.getValue(1));
break;
+ case ISD::FFREXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FFREXP, dl, {NVT, Node->getValueType(1)}, Tmp1);
+
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+
+ Results.push_back(Tmp2.getValue(1));
+ break;
+ }
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f1e80ce7e037..7e035d21ef71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -59,7 +59,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "SoftenFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soften the result of this operator!");
+ report_fatal_error("Do not know how to soften the result of this "
+ "operator!");
case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
@@ -107,10 +108,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::BF16_TO_FP: R = SoftenFloatRes_BF16_TO_FP(N); break;
case ISD::STRICT_FPOW:
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::STRICT_FPOWI:
- case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
+ case ISD::FFREXP:
+ R = SoftenFloatRes_FFREXP(N);
+ break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -142,6 +149,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = SoftenFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -510,10 +519,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
return BitConvertToInteger(Op);
}
- // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
- // entirely possible for both f16 and f32 to be legal, so use the fully
- // hard-float FP_EXTEND rather than FP16_TO_FP.
- if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ // There's only a libcall for f16 -> f32 and shifting is only valid for bf16
+ // -> f32, so proceed in two stages. Also, it's entirely possible for both
+ // f16 and f32 to be legal, so use the fully hard-float FP_EXTEND rather
+ // than FP16_TO_FP.
+ if ((Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) &&
+ N->getValueType(0) != MVT::f32) {
if (IsStrict) {
Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
{ MVT::f32, MVT::Other }, { Chain, Op });
@@ -523,6 +534,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
}
}
+ if (Op.getValueType() == MVT::bf16)
+ return SoftenFloatRes_BF16_TO_FP(N);
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -555,6 +569,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
}
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_BF16_TO_FP(SDNode *N) {
+ assert(N->getValueType(0) == MVT::f32 &&
+ "Can only soften BF16_TO_FP with f32 result");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ SDLoc DL(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, NVT,
+ DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op));
+ SDValue Res = DAG.getNode(ISD::SHL, DL, NVT, Op,
+ DAG.getShiftAmountConstant(16, NVT, DL));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -582,13 +611,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_PPCF128));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
unsigned Offset = IsStrict ? 1 : 0;
assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
"Unsupported power type!");
- RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
@@ -621,6 +654,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
return Tmp.first;
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented for frexp");
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT0);
+
+ if (DAG.getLibInfo().getIntSize() != VT1.getSizeInBits()) {
+ // If the exponent does not match with sizeof(int) a libcall would use the
+ // wrong type for the argument.
+ // TODO: Should be able to handle mismatches.
+ DAG.getContext()->emitError("ffrexp exponent does not match sizeof(int)");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
+ EVT NVT0 = TLI.getTypeToTransformTo(*DAG.getContext(), VT0);
+ SDValue StackSlot = DAG.CreateStackTemporary(VT1);
+
+ SDLoc DL(N);
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Ops[2] = {GetSoftenedFloat(N->getOperand(0)), StackSlot};
+ EVT OpsVT[2] = {VT0, StackSlot.getValueType()};
+
+ // TODO: setTypeListBeforeSoften can't properly express multiple return types,
+ // but we only really need to handle the 0th one for softening anyway.
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT0, true);
+
+ auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT0, Ops, CallOptions, DL,
+ /*Chain=*/SDValue());
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(VT1, DL, Chain, StackSlot, PtrInfo);
+
+ ReplaceValueWith(SDValue(N, 1), LoadExp);
+ return ReturnVal;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
@@ -828,7 +900,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soften this operator's operand!");
+ report_fatal_error("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
@@ -1199,7 +1271,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "ExpandFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand the result of this operator!");
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
@@ -1253,6 +1326,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: ExpandFloatRes_FLDEXP(N, Lo, Hi); break;
case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
@@ -1548,6 +1623,11 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FLDEXP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, RTLIB::getLDEXP(N->getValueType(0)), Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -1785,7 +1865,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
+ report_fatal_error("Do not know how to expand this operator's operand!");
case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
@@ -2106,7 +2186,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's operand!");
+ report_fatal_error("Do not know how to promote this operator's operand!");
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
@@ -2245,7 +2325,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "PromoteFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's result!");
+ report_fatal_error("Do not know how to promote this operator's result!");
case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
@@ -2289,7 +2369,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;
- case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
+ case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
@@ -2304,6 +2386,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = PromoteFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2458,7 +2542,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
}
// Promote the Float (first) operand and retain the Integer (second) operand
-SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteFloatRes_ExpOp(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
@@ -2467,6 +2551,17 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, N->getValueType(1)}, Op);
+
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
// Explicit operation to reduce precision. Reduce the value to half precision
// and promote it back to the legal type.
SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
@@ -2608,7 +2703,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
dbgs() << "SoftPromoteHalfResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soft promote this operator's result!");
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "result!");
case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
@@ -2655,7 +2751,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;
- case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
@@ -2668,6 +2765,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = SoftPromoteHalfRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2767,7 +2866,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
@@ -2916,7 +3015,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soft promote this operator's operand!");
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "operand!");
case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break;
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c9ce9071a25d..df5878fcdf2e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -54,7 +55,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator!");
+ report_fatal_error("Do not know how to promote this operator!");
case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
@@ -115,6 +116,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
case ISD::VECTOR_SPLICE:
Res = PromoteIntRes_VECTOR_SPLICE(N); break;
+ case ISD::VECTOR_INTERLEAVE:
+ case ISD::VECTOR_DEINTERLEAVE:
+ Res = PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(N);
+ return;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
@@ -134,7 +139,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
case ISD::VP_FP_TO_SINT:
@@ -191,8 +198,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDE:
case ISD::SUBE:
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntRes_UADDSUBO_CARRY(N, ResNo); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
@@ -279,6 +286,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::IS_FPCLASS:
Res = PromoteIntRes_IS_FPCLASS(N);
break;
+ case ISD::FFREXP:
+ Res = PromoteIntRes_FFREXP(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -753,8 +763,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
// If the result and operand types are the same after promotion, simplify
- // to an in-register extension.
- if (NVT == Res.getValueType()) {
+ // to an in-register extension. Unless this is a VP_*_EXTEND.
+ if (NVT == Res.getValueType() && N->getNumOperands() == 1) {
// The high bits are not guaranteed to be anything. Insert an extend.
if (N->getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
@@ -767,6 +777,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
}
// Otherwise, just extend the original operand all the way to the larger type.
+ if (N->getNumOperands() != 1) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+ }
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
@@ -1023,14 +1039,8 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
if (VT.isVector())
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
VT.getVectorElementCount());
- if (Signed) {
- LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
- RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
- } else {
- LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
- RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
- }
-
+ LHS = DAG.getExtOrTrunc(Signed, LHS, dl, WideVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, dl, WideVT);
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
DAG);
assert(Res && "Expanding DIVFIX with wide type failed?");
@@ -1177,16 +1187,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
// Get the SETCC result using the canonical SETCC type.
SDValue SetCC;
if (N->isStrictFPOpcode()) {
- EVT VTs[] = {SVT, MVT::Other};
+ SDVTList VTs = DAG.getVTList({SVT, MVT::Other});
SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3)};
- SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers, N->getFlags());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
} else
SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
+ N->getOperand(1), N->getOperand(2), N->getFlags());
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
@@ -1200,6 +1210,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) {
return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT VT = N->getValueType(0);
+
+ SDLoc dl(N);
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, NVT), N->getOperand(0));
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return Res.getValue(1);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
@@ -1445,23 +1467,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// Handle promotion for the ADDE/SUBE/UADDO_CARRY/USUBO_CARRY nodes. Notice that
// the third operand of ADDE/SUBE nodes is carry flag, which differs from
-// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
-SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
+// the UADDO_CARRY/USUBO_CARRY nodes in that the third operand is carry Boolean.
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
// We need to sign-extend the operands so the carry value computed by the
// wide operation will be equivalent to the carry value computed by the
// narrow operation.
- // An ADDCARRY can generate carry only if any of the operands has its
+ // An UADDO_CARRY can generate carry only if any of the operands has its
// most significant bit set. Sign extension propagates the most significant
// bit into the higher bits which means the extra bit that the narrow
// addition would need (i.e. the carry) will be propagated through the higher
// bits of the wide addition.
- // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
- // preserved by sign extension.
+ // A USUBO_CARRY can generate borrow only if LHS < RHS and this property will
+ // be preserved by sign extension.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
@@ -1629,7 +1652,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's operand!");
+ report_fatal_error("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
case ISD::ATOMIC_STORE:
@@ -1655,6 +1678,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::VP_SIGN_EXTEND: Res = PromoteIntOp_VP_SIGN_EXTEND(N); break;
case ISD::VP_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
@@ -1676,6 +1700,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+ case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
@@ -1690,8 +1715,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break;
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
@@ -1706,10 +1731,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
-
case ISD::FPOWI:
- case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break;
-
+ case ISD::STRICT_FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: Res = PromoteIntOp_ExpOp(N); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -2005,6 +2030,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
Op, DAG.getValueType(N->getOperand(0).getValueType()));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ unsigned Diff =
+ VT.getScalarSizeInBits() - N->getOperand(0).getScalarValueSizeInBits();
+ SDValue ShAmt = DAG.getShiftAmountConstant(Diff, VT, dl);
+ // FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts.
+ SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+ return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
if (N->getOpcode() == ISD::VP_SINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
@@ -2156,7 +2198,20 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
}
-SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
+ N->getOperand(0).getScalarValueSizeInBits());
+ return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
assert(OpNo == 2 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(0);
@@ -2193,26 +2248,29 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- // The integer operand is the last operand in FPOWI (so the result and
- // floating point operand is already type legalized).
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ // The integer operand is the last operand in FPOWI (or FLDEXP) (so the result
+ // and floating point operand is already type legalized).
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+ }
// We can't just promote the exponent type in FPOWI, since we want to lower
// the node to a libcall and we if we promote to a type larger than
// sizeof(int) the libcall might not be according to the targets ABI. Instead
// we rewrite to a libcall here directly, letting makeLibCall handle promotion
// if the target accepts it according to shouldSignExtendTypeInLibCall.
- RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
- if (!TLI.getLibcallName(LC)) {
- // Some targets don't have a powi libcall; use pow instead.
- // FIXME: Implement this if some target needs it.
- DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
- return DAG.getUNDEF(N->getValueType(0));
- }
+
unsigned OpOffset = IsStrict ? 1 : 0;
// The exponent should fit in a sizeof(int) type for the libcall to be valid.
assert(DAG.getLibInfo().getIntSize() ==
@@ -2290,16 +2348,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
// An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
// vecreduce_or is not legal
else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) {
Opcode = ISD::VECREDUCE_UMAX;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
// An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
// vecreduce_and is not legal
else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) {
Opcode = ISD::VECREDUCE_UMIN;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
if (ResVT.bitsGE(EltVT))
return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
@@ -2512,8 +2594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDE:
case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: ExpandIntRes_UADDSUBO_CARRY(N, Lo, Hi); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
@@ -2874,48 +2956,118 @@ static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
- ISD::NodeType LoOpc;
- ISD::CondCode CondC;
- std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- // Expand the subcomponents.
- SDValue LHSL, LHSH, RHSL, RHSH;
- GetExpandedInteger(LHS, LHSL, LHSH);
- GetExpandedInteger(RHS, RHSL, RHSH);
-
- // Value types
- EVT NVT = LHSL.getValueType();
- EVT CCT = getSetCCResultType(NVT);
-
// If the upper halves are all sign bits, then we can perform the MINMAX on
// the lower half and sign-extend the result to the upper half.
- unsigned NumHalfBits = NVT.getScalarSizeInBits();
+ unsigned NumBits = N->getValueType(0).getScalarSizeInBits();
+ unsigned NumHalfBits = NumBits / 2;
if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+
Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
return;
}
- // Hi part is always the same op
- Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ // The Lo of smin(X, -1) is LHSL if X is negative. Otherwise it's -1.
+ // The Lo of smax(X, 0) is 0 if X is negative. Otherwise it's LHSL.
+ if ((N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) ||
+ (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS))) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
- // We need to know whether to select Lo part that corresponds to 'winning'
- // Hi part or if Hi parts are equal.
- SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
- SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+ SDValue HiNeg =
+ DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT);
+ if (N->getOpcode() == ISD::SMIN) {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT));
+ } else {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL);
+ }
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ return;
+ }
- // Lo part corresponding to the 'winning' Hi part
- SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+ const APInt *RHSVal = nullptr;
+ if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS))
+ RHSVal = &RHSConst->getAPIntValue();
- // Recursed Lo part if Hi parts are equal, this uses unsigned version
- SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+ // The high half of MIN/MAX is always just the the MIN/MAX of the
+ // high halves of the operands. Expand this way if it appears profitable.
+ if (RHSVal && (N->getOpcode() == ISD::UMIN || N->getOpcode() == ISD::UMAX) &&
+ (RHSVal->countLeadingOnes() >= NumHalfBits ||
+ RHSVal->countLeadingZeros() >= NumHalfBits)) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
- Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+ return;
+ }
+
+ // Expand to "a < b ? a : b" etc. Prefer ge/le if that simplifies
+ // the compare.
+ ISD::CondCode Pred;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETGE;
+ else
+ Pred = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETLE;
+ else
+ Pred = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETUGE;
+ else
+ Pred = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETULE;
+ else
+ Pred = ISD::SETULT;
+ break;
+ }
+ EVT VT = N->getValueType(0);
+ EVT CCT = getSetCCResultType(VT);
+ SDValue Cond = DAG.getSetCC(DL, CCT, LHS, RHS, Pred);
+ SDValue Result = DAG.getSelect(DL, VT, Cond, LHS, RHS);
+ SplitInteger(Result, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -2931,7 +3083,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue HiOps[3] = { LHSH, RHSH };
bool HasOpCarry = TLI.isOperationLegalOrCustom(
- N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ N->getOpcode() == ISD::ADD ? ISD::UADDO_CARRY : ISD::USUBO_CARRY,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasOpCarry) {
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
@@ -2940,13 +3092,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2))
- : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+ : DAG.getNode(ISD::UADDO_CARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2))
- : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+ : DAG.getNode(ISD::USUBO_CARRY, dl, VTList, HiOps);
}
return;
}
@@ -3014,8 +3166,22 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
- SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
+ SDValue Cmp;
+ // Special case: X+1 has a carry out if X+1==0. This may reduce the live
+ // range of X. We assume comparing with 0 is cheap.
+ if (isOneConstant(LoOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else if (isAllOnesConstant(LoOps[1])) {
+ if (isAllOnesConstant(HiOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+ } else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
SDValue Carry;
if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
@@ -3024,7 +3190,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1]))
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry);
+ else
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
} else {
Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
@@ -3101,12 +3270,12 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ISD::CondCode Cond;
switch(N->getOpcode()) {
case ISD::UADDO:
- CarryOp = ISD::ADDCARRY;
+ CarryOp = ISD::UADDO_CARRY;
NoCarryOp = ISD::ADD;
Cond = ISD::SETULT;
break;
case ISD::USUBO:
- CarryOp = ISD::SUBCARRY;
+ CarryOp = ISD::USUBO_CARRY;
NoCarryOp = ISD::SUB;
Cond = ISD::SETUGT;
break;
@@ -3137,9 +3306,22 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(Sum, Lo, Hi);
- // Calculate the overflow: addition overflows iff a + b < a, and subtraction
- // overflows iff a - b > a.
- Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ if (N->getOpcode() == ISD::UADDO && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 == 0. We can detect this
+ // with (Lo | Hi) == 0.
+ SDValue Or = DAG.getNode(ISD::OR, dl, Lo.getValueType(), Lo, Hi);
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Or,
+ DAG.getConstant(0, dl, Lo.getValueType()), ISD::SETEQ);
+ } else if (N->getOpcode() == ISD::UADDO && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X == 0.
+ Ovf =
+ DAG.getSetCC(dl, N->getValueType(1), LHS,
+ DAG.getConstant(0, dl, LHS.getValueType()), ISD::SETNE);
+ } else {
+ // Calculate the overflow: addition overflows iff a + b < a, and
+ // subtraction overflows iff a - b > a.
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ }
}
// Legalized the flag result - switch anything that used the old flag to
@@ -3147,8 +3329,8 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ovf);
}
-void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO_CARRY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
@@ -3177,8 +3359,8 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
// We need to use an unsigned carry op for the lo part.
- unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
- : ISD::SUBCARRY;
+ unsigned CarryOp =
+ N->getOpcode() == ISD::SADDO_CARRY ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
@@ -3308,14 +3490,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
return;
}
- // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
- // use in LegalizeDAG. The SUB part of the expansion is based on
- // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
- // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded
- // if needed. Shift expansion has a special case for filling with sign bits
- // so that we will only end up with one SRA.
+ // If we have USUBO_CARRY, use the expanded form of the sra+xor+sub sequence
+ // we use in LegalizeDAG. The SUB part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses USUBO_CARRY/USUBO after checking that
+ // USUBO_CARRY is LegalOrCustom. Each of the pieces here can be further
+ // expanded if needed. Shift expansion has a special case for filling with
+ // sign bits so that we will only end up with one SRA.
bool HasSubCarry = TLI.isOperationLegalOrCustom(
- ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ ISD::USUBO_CARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasSubCarry) {
SDValue Sign = DAG.getNode(
ISD::SRA, dl, NVT, Hi,
@@ -3324,7 +3506,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign);
- Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ Hi = DAG.getNode(ISD::USUBO_CARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
return;
}
@@ -4956,8 +5138,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
- bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
- CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+ bool EqAllowed = ISD::isTrueWhenEqual(CCCode);
// FIXME: Is the HiCmpC->isOne() here correct for
// ZeroOrNegativeOneBooleanContent.
@@ -5088,9 +5269,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
GetExpandedInteger(LHS, LHSLo, LHSHi);
GetExpandedInteger(RHS, RHSLo, RHSHi);
- // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
+ // Expand to a USUBO_CARRY for the low part and a SETCCCARRY for the high.
SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
- SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ SDValue LowCmp =
+ DAG.getNode(ISD::USUBO_CARRY, dl, VTList, LHSLo, RHSLo, Carry);
return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
LowCmp.getValue(1), Cond);
}
@@ -5293,6 +5475,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT ResVT = V0.getValueType();
+ SDValue Res = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(ResVT, ResVT), V0, V1);
+ SetPromotedInteger(SDValue(N, 0), Res.getValue(0));
+ SetPromotedInteger(SDValue(N, 1), Res.getValue(1));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 5e0349593139..328939e44dcb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -245,8 +245,7 @@ bool DAGTypeLegalizer::run() {
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
- LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal result type\n");
@@ -716,7 +715,6 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
assert((OpIdEntry == 0) && "Node is already promoted!");
OpIdEntry = getTableId(Result);
- Result->setFlags(Op->getFlags());
DAG.transferDbgValues(Op, Result);
}
@@ -989,10 +987,7 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(Pair);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
- DAG.getIntPtrConstant(0, dl));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
- DAG.getIntPtrConstant(1, dl));
+ std::tie(Lo, Hi) = DAG.SplitScalar(Pair, dl, NVT, NVT);
}
/// Build an integer with low bits Lo and high bits Hi.
@@ -1005,7 +1000,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
LVT.getSizeInBits() + HVT.getSizeInBits());
- EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b97e44a01319..db8f61eee606 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -307,6 +307,7 @@ private:
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_ScalarOp(SDNode *N);
SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
@@ -331,6 +332,7 @@ private:
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_FFREXP(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_Select(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
@@ -345,7 +347,7 @@ private:
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
@@ -383,6 +385,7 @@ private:
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_FunnelShift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -390,15 +393,16 @@ private:
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FIX(SDNode *N);
- SDValue PromoteIntOp_FPOWI(SDNode *N);
+ SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
@@ -447,7 +451,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -558,9 +562,11 @@ private:
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_BF16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
- SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_ExpOp(SDNode *N);
+ SDValue SoftenFloatRes_FFREXP(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
@@ -638,6 +644,7 @@ private:
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLDEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -687,7 +694,8 @@ private:
SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
SDValue PromoteFloatRes_FMAD(SDNode *N);
- SDValue PromoteFloatRes_FPOWI(SDNode *N);
+ SDValue PromoteFloatRes_ExpOp(SDNode *N);
+ SDValue PromoteFloatRes_FFREXP(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
@@ -728,7 +736,7 @@ private:
SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
- SDValue SoftPromoteHalfRes_FPOWI(SDNode *N);
+ SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
@@ -781,7 +789,7 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
- SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
@@ -795,6 +803,7 @@ private:
SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
+ SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -843,6 +852,7 @@ private:
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -857,8 +867,7 @@ private:
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
@@ -875,6 +884,8 @@ private:
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N);
+ void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -901,7 +912,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
- SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+ SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
//===--------------------------------------------------------------------===//
@@ -942,6 +953,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_AssertZext(SDNode* N);
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
@@ -976,7 +988,7 @@ private:
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
- SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -1001,11 +1013,12 @@ private:
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
- SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_UnrollVectorOp(SDNode *N);
SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
+ SDValue WidenVecOp_ExpOp(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
@@ -1071,6 +1084,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 21b5255c8f72..296242c00401 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,6 +571,16 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getUNDEF(HiVT);
}
+void DAGTypeLegalizer::SplitVecRes_AssertZext(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::AssertZext, dl, L.getValueType(), L, N->getOperand(1));
+ Hi = DAG.getNode(ISD::AssertZext, dl, H.getValueType(), H, N->getOperand(1));
+}
+
void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue L, H;
SDLoc dl(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index e245b3cb4c6d..3862fd241897 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -296,7 +296,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
ValVT = Node->getOperand(1).getValueType();
- Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
+ if (Op.getOpcode() == ISD::STRICT_FSETCC ||
+ Op.getOpcode() == ISD::STRICT_FSETCCS) {
+ MVT OpVT = Node->getOperand(1).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
+ }
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
@@ -368,6 +377,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
case ISD::FLOG:
@@ -402,6 +412,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMULO:
case ISD::UMULO:
case ISD::FCANONICALIZE:
+ case ISD::FFREXP:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -441,6 +452,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -454,7 +467,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
Action = TLI.getCondCodeAction(CCCode, OpVT);
if (Action == TargetLowering::Legal)
- Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
break;
}
@@ -785,6 +798,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::BITREVERSE:
ExpandBITREVERSE(Node, Results);
return;
@@ -943,6 +963,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
case ISD::VECREDUCE_SEQ_FADD:
@@ -958,7 +980,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
- Results.push_back(DAG.UnrollVectorOp(Node));
+ SDValue Unrolled = DAG.UnrollVectorOp(Node);
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
}
SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
@@ -1304,11 +1328,11 @@ SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
SDValue Ones = DAG.getAllOnesConstant(DL, VT);
- SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
+ SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
- Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
- Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
- return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
+ Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
+ Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
+ return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
}
SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
@@ -1516,39 +1540,54 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
bool NeedInvert = false;
bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
- SDLoc dl(Node);
- MVT OpVT = Node->getOperand(0).getSimpleValueType();
- ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ unsigned Offset = IsStrict ? 1 : 0;
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ SDValue LHS = Node->getOperand(0 + Offset);
+ SDValue RHS = Node->getOperand(1 + Offset);
+ SDValue CC = Node->getOperand(2 + Offset);
+
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
Results.push_back(UnrollVSETCC(Node));
return;
}
- SDValue Chain;
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- SDValue CC = Node->getOperand(2);
SDValue Mask, EVL;
if (IsVP) {
- Mask = Node->getOperand(3);
- EVL = Node->getOperand(4);
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
}
+ SDLoc dl(Node);
bool Legalized =
TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
- EVL, NeedInvert, dl, Chain);
+ EVL, NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
if (CC.getNode()) {
- if (!IsVP)
- LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
- Node->getFlags());
- else
+ if (IsStrict) {
+ LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
+ {Chain, LHS, RHS, CC}, Node->getFlags());
+ Chain = LHS.getValue(1);
+ } else if (IsVP) {
LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
{LHS, RHS, CC, Mask, EVL}, Node->getFlags());
+ } else {
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+ }
}
// If we expanded the SETCC by inverting the condition code, then wrap
@@ -1560,6 +1599,8 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
}
} else {
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
@@ -1571,6 +1612,8 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
}
Results.push_back(LHS);
+ if (IsStrict)
+ Results.push_back(Chain);
}
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
@@ -1618,6 +1661,12 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
return;
}
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS) {
+ ExpandSETCC(Node, Results);
+ return;
+ }
+
UnrollStrictFPOp(Node, Results);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index af5ea1ce5f45..8c117c1c74dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -57,7 +57,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
@@ -113,7 +113,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
-
+ case ISD::FFREXP:
+ R = ScalarizeVecRes_FFREXP(N, ResNo);
+ break;
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
@@ -126,6 +128,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FLDEXP:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -142,6 +145,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::OR:
case ISD::SDIV:
case ISD::SREM:
@@ -221,6 +226,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
Op2, N->getFlags());
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ SDLoc dl(N);
+
+ SDNode *ScalarNode =
+ DAG.getNode(N->getOpcode(), dl,
+ {VT0.getScalarType(), VT1.getScalarType()}, Elt)
+ .getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT,
+ SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
EVT VT = N->getValueType(0).getVectorElementType();
unsigned NumOpers = N->getNumOperands();
@@ -348,10 +381,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
N->getOperand(1));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::FPOWI, SDLoc(N),
- Op.getValueType(), Op, N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -695,6 +728,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = ScalarizeVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -948,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
"operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
case ISD::VSELECT:
case ISD::SELECT:
case ISD::VP_MERGE:
@@ -959,8 +995,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
- case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
@@ -1000,6 +1037,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SPLICE:
SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
break;
+ case ISD::VECTOR_DEINTERLEAVE:
+ SplitVecRes_VECTOR_DEINTERLEAVE(N);
+ return;
+ case ISD::VECTOR_INTERLEAVE:
+ SplitVecRes_VECTOR_INTERLEAVE(N);
+ return;
case ISD::VAARG:
SplitVecRes_VAARG(N, Lo, Hi);
break;
@@ -1069,6 +1112,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
+ case ISD::FFREXP:
+ SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
+ break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -1456,16 +1502,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
}
-void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
-}
-
-void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+// Handle splitting an FP where the second operand does not match the first
+// type. The second operand may be a scalar, or a vector that has exactly as
+// many elements as the first
+void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDLoc DL(N);
@@ -1473,14 +1514,18 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
SDValue RHSLo, RHSHi;
SDValue RHS = N->getOperand(1);
EVT RHSVT = RHS.getValueType();
- if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(RHS, RHSLo, RHSHi);
- else
- std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
-
+ if (RHSVT.isVector()) {
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
- Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
+ }
}
void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
@@ -2284,6 +2329,42 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
}
+void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+ auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi);
+ Lo->setFlags(N->getFlags());
+ Hi->setFlags(N->getFlags());
+
+ SDNode *HiNode = Hi.getNode();
+ SDNode *LoNode = Lo.getNode();
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
@@ -2377,7 +2458,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
EVT EltVT = NewVT.getVectorElementType();
SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
for (unsigned I = 0; I < NewElts; ++I) {
- if (Mask[I] == UndefMaskElem)
+ if (Mask[I] == PoisonMaskElem)
continue;
unsigned Idx = Mask[I];
if (Idx >= NewElts)
@@ -2417,11 +2498,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Use shuffles operands instead of shuffles themselves.
// 1. Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
auto *Shuffle =
@@ -2429,8 +2510,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
if (!Shuffle || !is_contained(P.second, SrcRegIdx))
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
Idx = MaskElt % NewElts +
@@ -2449,11 +2530,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Check if any concat_vectors can be simplified.
SmallBitVector UsedSubVector(2 * std::size(Inputs));
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
TargetLowering::LegalizeTypeAction TypeAction =
@@ -2483,7 +2564,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
if (!Pairs.empty() && Pairs.front().size() > 1) {
// Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
auto *It = find_if(
@@ -2525,14 +2606,14 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
!Shuffle->getOperand(1).isUndef()) {
// Find the only used operand, if possible.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
int OpIdx = MaskElt / NewElts;
@@ -2558,14 +2639,14 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Found that operand is used already.
// 1. Fix the mask for the reused operand.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
int MaskIdx = MaskElt / NewElts;
@@ -2582,7 +2663,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Inputs[I] = Shuffle->getOperand(Op);
// Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
@@ -2616,11 +2697,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
unsigned ConstNum = UniqueConstantVec.size();
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
@@ -2649,7 +2730,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands.
unsigned FirstMaskIdx = High * NewElts;
- SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem);
+ SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem);
copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
assert(!Output && "Expected default initialized initial value.");
TryPeekThroughShufflesInputs(Mask);
@@ -2768,6 +2849,37 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
}
+void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
+
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op0Hi);
+ SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op1Lo, Op1Hi);
+
+ SetSplitVector(SDValue(N, 0), ResLo.getValue(0), ResHi.getValue(0));
+ SetSplitVector(SDValue(N, 1), ResLo.getValue(1), ResHi.getValue(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op1Lo),
+ DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Hi, Op1Hi)};
+
+ SetSplitVector(SDValue(N, 0), Res[0].getValue(0), Res[0].getValue(1));
+ SetSplitVector(SDValue(N, 1), Res[1].getValue(0), Res[1].getValue(1));
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
@@ -2808,7 +2920,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
- case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -2862,6 +2974,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FTRUNC:
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FLDEXP:
+ Res = SplitVecOp_FPOpDifferentTypes(N);
+ break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -2882,6 +2997,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = SplitVecOp_VECREDUCE(N, OpNo);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -3807,10 +3924,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
- // The result (and the first input) has a legal vector type, but the second
- // input needs splitting.
-
+// Split a vector type in an FP binary operation where the second operand has a
+// different type from the first.
+//
+// The result (and the first input) has a legal vector type, but the second
+// input needs splitting.
+SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
SDLoc DL(N);
EVT LHSLoVT, LHSHiVT;
@@ -3826,8 +3945,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
SDValue RHSLo, RHSHi;
std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
- SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo);
- SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi);
+ SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
@@ -3885,9 +4004,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to widen the result of this operator!");
+ report_fatal_error("Do not know how to widen the result of this operator!");
case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
@@ -4036,8 +4156,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_IS_FPCLASS(N);
break;
+ case ISD::FLDEXP:
case ISD::FPOWI:
- Res = WidenVecRes_POWI(N);
+ if (!unrollExpandedOp())
+ Res = WidenVecRes_ExpOp(N);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
@@ -4394,10 +4516,18 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 1; i < NumOpers; ++i) {
SDValue Oper = N->getOperand(i);
- if (Oper.getValueType().isVector()) {
- assert(Oper.getValueType() == N->getValueType(0) &&
- "Invalid operand type to widen!");
- Oper = GetWidenedVector(Oper);
+ EVT OpVT = Oper.getValueType();
+ if (OpVT.isVector()) {
+ if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
+ Oper = GetWidenedVector(Oper);
+ else {
+ EVT WideOpVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ WidenVT.getVectorElementCount());
+ Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ DAG.getUNDEF(WideOpVT), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
}
InOps.push_back(Oper);
@@ -4415,9 +4545,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
- if (Op.getValueType().isVector())
- Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector()) {
+ EVT OpExtractVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ VT.getVectorElementCount());
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op,
DAG.getVectorIdxConstant(Idx, dl));
+ }
EOps.push_back(Op);
}
@@ -4441,8 +4576,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
- if (Op.getValueType().isVector())
- Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector())
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OpVT.getVectorElementType(), Op,
DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
@@ -4751,11 +4888,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
N->getFlags());
}
-SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+ SDValue RHS = N->getOperand(1);
+ SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}
SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
@@ -4763,7 +4902,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
if (N->getNumOperands() == 1)
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
@@ -4863,7 +5002,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
WidenSize / InEltVT.getSizeInBits());
} else {
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts);
+ // For big endian systems, using the promoted input scalar type
+ // to produce the scalar_to_vector would put the desired bits into
+ // the least significant byte(s) of the wider element zero. This
+ // will mean that the users of the result vector are using incorrect
+ // bits. Use the original input type instead. Although either input
+ // type can be used on little endian systems, for consistency we
+ // use the original type there as well.
+ EVT OrigInVT = N->getOperand(0).getValueType();
+ NewNumParts = WidenSize / OrigInVT.getSizeInBits();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts);
}
if (TLI.isTypeLegal(NewInVT)) {
@@ -5080,6 +5228,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) {
+ SDValue InOp = ModifyToType(
+ N->getOperand(0),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true);
+ return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
@@ -5105,30 +5261,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return SDValue();
}
- SDValue Result;
- SmallVector<SDValue, 16> LdChain; // Chain for the series of load
- if (ExtType != ISD::NON_EXTLOAD)
- Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
- else
- Result = GenWidenVectorLoads(LdChain, LD);
-
- if (Result) {
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
-
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
-
- return Result;
- }
-
// Generate a vector-predicated load if it is custom/legal on the target. To
// avoid possible recursion, only do this if the widened mask type is legal.
// FIXME: Not all targets may support EVL in VP_LOAD. These will have been
@@ -5138,15 +5270,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ if (ExtType == ISD::NON_EXTLOAD &&
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
SDLoc DL(N);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = LdVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ LdVT.getVectorElementCount());
const auto *MMO = LD->getMemOperand();
SDValue NewLoad =
DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
@@ -5160,6 +5290,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return NewLoad;
}
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
report_fatal_error("Unable to widen vector load");
}
@@ -5780,7 +5934,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to widen this operator's operand!");
+ report_fatal_error("Do not know how to widen this operator's operand!");
case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
@@ -5800,7 +5954,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
- case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
@@ -5843,6 +5998,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = WidenVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -5947,7 +6104,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
-SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
// The result (and first input) is legal, but the second input is illegal.
// We can't do much to fix that, so just unroll and let the extracts off of
// the second input be widened as needed later.
@@ -6192,14 +6349,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (ST->isTruncatingStore())
return TLI.scalarizeVectorStore(ST, DAG);
- SmallVector<SDValue, 16> StChain;
- if (GenWidenVectorStores(StChain, ST)) {
- if (StChain.size() == 1)
- return StChain[0];
-
- return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
- }
-
// Generate a vector-predicated store if it is custom/legal on the target.
// To avoid possible recursion, only do this if the widened mask type is
// legal.
@@ -6211,23 +6360,29 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (WideVT.isScalableVector() &&
- TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+
+ if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
// Widen the value.
SDLoc DL(N);
StVal = GetWidenedVector(StVal);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = StVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ StVT.getVectorElementCount());
return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
- EVL, StVal.getValueType(), ST->getMemOperand(),
+ EVL, StVT, ST->getMemOperand(),
ST->getAddressingMode());
}
+ SmallVector<SDValue, 16> StChain;
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
+
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
report_fatal_error("Unable to widen vector store");
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 9fcf692babdc..c31b971e7fc3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -230,7 +230,7 @@ public:
bool isEmitted() const { return Emitted; }
/// clearIsEmitted - Reset Emitted flag, for certain special cases where
- /// dbg.addr is emitted twice.
+ /// SDDbgValue is emitted twice. DBG_INSTR_REF depends on this behaviour.
void clearIsEmitted() { Emitted = false; }
LLVM_DUMP_METHOD void dump() const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2d93adea6b9b..5b01743d23e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -69,7 +69,7 @@ private:
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
- unsigned NumLiveRegs;
+ unsigned NumLiveRegs = 0u;
std::vector<SUnit*> LiveRegDefs;
std::vector<unsigned> LiveRegCycles;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c252046ef10b..458f50c54824 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -156,16 +156,16 @@ private:
unsigned CurCycle = 0;
/// MinAvailableCycle - Cycle of the soonest available instruction.
- unsigned MinAvailableCycle;
+ unsigned MinAvailableCycle = ~0u;
/// IssueCount - Count instructions issued in this cycle
/// Currently valid only for bottom-up scheduling.
- unsigned IssueCount;
+ unsigned IssueCount = 0u;
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
- unsigned NumLiveRegs;
+ unsigned NumLiveRegs = 0u;
std::unique_ptr<SUnit*[]> LiveRegDefs;
std::unique_ptr<SUnit*[]> LiveRegGens;
@@ -1744,12 +1744,12 @@ protected:
bool SrcOrder;
// SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
+ std::vector<SUnit> *SUnits = nullptr;
MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetLowering *TLI = nullptr;
ScheduleDAGRRList *scheduleDAG = nullptr;
// SethiUllmanNumbers - The SethiUllman number for each node.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 2e1fd1e8a758..0579c1664d5c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -667,7 +667,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
- Latency = (Latency > 1) ? Latency - 1 : 1;
+ Latency = Latency - 1;
}
if (Latency >= 0)
dep.setLatency(Latency);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 99bbaeb19182..439ccfdc3275 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -16,10 +16,10 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <string>
#include <vector>
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9a3609bc183b..5c1b19eba1c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -17,11 +17,11 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -61,12 +62,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -200,10 +201,10 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
SDValue NotZero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
- if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ if (CN->getAPIntValue().countr_one() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
- if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_one() < EltSize)
return false;
} else
return false;
@@ -244,10 +245,10 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
// constants are.
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
- if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ if (CN->getAPIntValue().countr_zero() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
- if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_zero() < EltSize)
return false;
} else
return false;
@@ -454,6 +455,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
case ISD::VECREDUCE_FMIN:
case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
+ case ISD::VECREDUCE_FMAXIMUM:
+ return ISD::FMAXIMUM;
+ case ISD::VECREDUCE_FMINIMUM:
+ return ISD::FMINIMUM;
}
}
@@ -516,6 +521,31 @@ std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
}
}
+std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode,
+ bool hasFPExcept) {
+ // FIXME: Return strict opcodes in case of fp exceptions.
+ switch (VPOpcode) {
+ default:
+ return std::nullopt;
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC:
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC;
+#define END_REGISTER_VP_SDNODE(VPOPC) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return std::nullopt;
+}
+
+unsigned ISD::getVPForBaseOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("can not translate this Opcode to VP.");
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC:
+#define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -866,12 +896,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getMemOperand()->getFlags());
break;
}
- case ISD::PREFETCH: {
- const MemSDNode *PF = cast<MemSDNode>(N);
- ID.AddInteger(PF->getPointerInfo().getAddrSpace());
- ID.AddInteger(PF->getMemOperand()->getFlags());
- break;
- }
case ISD::VECTOR_SHUFFLE: {
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
@@ -890,14 +914,20 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::AssertAlign:
ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value());
break;
+ case ISD::PREFETCH:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ // Handled by MemIntrinsicSDNode check after the switch.
+ break;
} // end switch (N->getOpcode())
- // Target specific memory nodes could also have address spaces and flags
+ // MemIntrinsic nodes could also have subclass data, address spaces, and flags
// to check.
- if (N->isTargetMemoryOpcode()) {
- const MemSDNode *MN = cast<MemSDNode>(N);
+ if (auto *MN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ ID.AddInteger(MN->getRawSubclassData());
ID.AddInteger(MN->getPointerInfo().getAddrSpace());
ID.AddInteger(MN->getMemOperand()->getFlags());
+ ID.AddInteger(MN->getMemoryVT().getRawBits());
}
}
@@ -1285,8 +1315,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE, Pass *PassPtr,
const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis *Divergence,
- ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin,
+ UniformityInfo *NewUA, ProfileSummaryInfo *PSIin,
+ BlockFrequencyInfo *BFIin,
FunctionVarLocs const *VarLocs) {
MF = &NewMF;
SDAGISelPass = PassPtr;
@@ -1295,7 +1325,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
TSI = getSubtarget().getSelectionDAGInfo();
LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
- DA = Divergence;
+ UA = NewUA;
PSI = PSIin;
BFI = BFIin;
FnVarLocs = VarLocs;
@@ -1910,6 +1940,34 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
+SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold) {
+ assert(MulImm.getBitWidth() == VT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ if (ConstantFold) {
+ const MachineFunction &MF = getMachineFunction();
+ auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
+ if (*VScaleMax == VScaleMin)
+ return getConstant(MulImm * VScaleMin, DL, VT);
+ }
+ }
+
+ return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+}
+
+SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold) {
+ if (EC.isScalable())
+ return getVScale(DL, VT,
+ APInt(VT.getSizeInBits(), EC.getKnownMinValue()));
+
+ return getConstant(EC.getKnownMinValue(), DL, VT);
+}
+
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
@@ -2128,7 +2186,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
- N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -2381,6 +2439,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
EVT OpVT = N1.getValueType();
+ auto GetUndefBooleanConstant = [&]() {
+ if (VT.getScalarType() == MVT::i1 ||
+ TLI->getBooleanContents(OpVT) ==
+ TargetLowering::UndefinedBooleanContent)
+ return getUNDEF(VT);
+ // ZeroOrOne / ZeroOrNegative require specific values for the high bits,
+ // so we cannot use getUNDEF(). Return zero instead.
+ return getConstant(0, dl, VT);
+ };
+
// These setcc operations always fold.
switch (Cond) {
default: break;
@@ -2410,12 +2478,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp eq/ne X, undef -> undef.
if ((N1.isUndef() || N2.isUndef()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE))
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
// If both operands are undef, we can return undef for int comparison.
// icmp undef, undef -> undef.
if (N1.isUndef() && N2.isUndef())
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
@@ -2441,34 +2509,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
switch (Cond) {
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT, OpVT);
@@ -2513,7 +2581,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case 1: // Known true.
return getBoolConstant(true, dl, VT, OpVT);
case 2: // Undefined.
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
}
}
@@ -2567,7 +2635,7 @@ APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask.");
- APInt KnownZeroElements = APInt::getNullValue(NumElts);
+ APInt KnownZeroElements = APInt::getZero(NumElts);
for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
if (!DemandedElts[EltIdx])
continue; // Don't query elements that are not demanded.
@@ -2661,8 +2729,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
case ISD::VECTOR_SHUFFLE: {
// Check if this is a shuffle node doing a splat or a shuffle of a splat.
- APInt DemandedLHS = APInt::getNullValue(NumElts);
- APInt DemandedRHS = APInt::getNullValue(NumElts);
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
@@ -2689,7 +2757,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
// TODO: Handle source ops splats with undefs.
auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
APInt SrcUndefs;
- return (SrcElts.countPopulation() == 1) ||
+ return (SrcElts.popcount() == 1) ||
(isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
(SrcElts & SrcUndefs).isZero());
};
@@ -2808,7 +2876,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
SplatIdx = 0;
return getUNDEF(VT);
}
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ SplatIdx = (UndefElts & DemandedElts).countr_one();
}
return V;
}
@@ -3005,7 +3073,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
// Known bits are the values that are shared by every demanded element.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3028,7 +3096,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3036,10 +3104,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
+ case ISD::VSCALE: {
+ const Function &F = getMachineFunction().getFunction();
+ const APInt &Multiplier = Op.getConstantOperandAPInt(0);
+ Known = getVScaleRange(&F, BitWidth).multiply(Multiplier).toKnownBits();
+ break;
+ }
case ISD::CONCAT_VECTORS: {
if (Op.getValueType().isScalableVector())
break;
@@ -3054,7 +3128,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3084,7 +3158,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
if (!!DemandedSrcElts) {
Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -3174,8 +3248,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (DemandedElts[i]) {
unsigned Shifts = IsLE ? i : NumElts - 1 - i;
unsigned Offset = (Shifts % SubScale) * BitWidth;
- Known = KnownBits::commonBits(Known,
- Known2.extractBits(BitWidth, Offset));
+ Known = Known.intersectWith(Known2.extractBits(BitWidth, Offset));
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
@@ -3273,7 +3346,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
@@ -3283,7 +3356,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SMULO:
case ISD::UMULO:
@@ -3334,7 +3407,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::ashr(Known, Known2);
- // TODO: Add minimum shift high known sign bits.
break;
case ISD::FSHL:
case ISD::FSHR:
@@ -3364,8 +3436,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2.One.lshrInPlace(Amt);
Known2.Zero.lshrInPlace(Amt);
}
- Known.One |= Known2.One;
- Known.Zero |= Known2.Zero;
+ Known = Known.unionWith(Known2);
}
break;
case ISD::SHL_PARTS:
@@ -3588,9 +3659,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), Known, Known2);
+ break;
+ }
case ISD::USUBO:
case ISD::SSUBO:
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
case ISD::SSUBO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
@@ -3601,13 +3681,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
[[fallthrough]];
- case ISD::SUB:
case ISD::SUBC: {
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
// TODO: Compute influence of the carry operand.
- if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY)
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
break;
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -3618,7 +3697,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::UADDO:
case ISD::SADDO:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
case ISD::SADDO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
@@ -3629,17 +3708,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
[[fallthrough]];
- case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
- // With ADDE and ADDCARRY, a carry bit may be added in.
+ // With ADDE and UADDO_CARRY, a carry bit may be added in.
KnownBits Carry(1);
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY)
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
// TODO: Compute known bits for the carry operand. Not sure if it is worth
// the trouble (how often will we find a known carry bit). And I haven't
// tested this very much yet, but something like this might work:
@@ -3657,7 +3735,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::UDIV: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::udiv(Known, Known2);
+ Known = KnownBits::udiv(Known, Known2, Op->getFlags().hasExact());
+ break;
+ }
+ case ISD::SDIV: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::sdiv(Known, Known2, Op->getFlags().hasExact());
break;
}
case ISD::SREM: {
@@ -3735,11 +3819,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setAllBits();
if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2.zextOrTrunc(BitWidth));
+ Known = Known.intersectWith(Known2.zextOrTrunc(BitWidth));
}
if (!!DemandedVecElts) {
Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -3897,38 +3981,87 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
return Known;
}
-SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
- SDValue N1) const {
+/// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind.
+static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) {
+ switch (OR) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return SelectionDAG::OFK_Sometime;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+ return SelectionDAG::OFK_Always;
+ case ConstantRange::OverflowResult::NeverOverflows:
+ return SelectionDAG::OFK_Never;
+ }
+ llvm_unreachable("Unknown OverflowResult");
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const {
// X + 0 never overflow
if (isNullConstant(N1))
return OFK_Never;
- KnownBits N1Known = computeKnownBits(N1);
- if (N1Known.Zero.getBoolValue()) {
- KnownBits N0Known = computeKnownBits(N0);
+ // If both operands each have at least two sign bits, the addition
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
- bool overflow;
- (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow);
- if (!overflow)
- return OFK_Never;
- }
+ // TODO: Add ConstantRange::signedAddMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
// mulhi + 1 never overflow
+ KnownBits N1Known = computeKnownBits(N1);
if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
- (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue())
+ N1Known.getMaxValue().ult(2))
return OFK_Never;
- if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
- KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N0Known = computeKnownBits(N0);
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 &&
+ N0Known.getMaxValue().ult(2))
+ return OFK_Never;
- if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue())
- return OFK_Never;
- }
+ // Fallback to ConstantRange::unsignedAddMayOverflow handling.
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedAddMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // If both operands each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
+
+ // TODO: Add ConstantRange::signedSubMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+ // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
return OFK_Sometime;
}
-bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarSizeInBits();
@@ -3970,15 +4103,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// vscale(power-of-two) is a power-of-two for some targets
if (Val.getOpcode() == ISD::VSCALE &&
getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
- isKnownToBeAPowerOfTwo(Val.getOperand(0)))
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
return true;
// More could be done here, though the above checks are enough
// to handle some common cases.
-
- // Fall back to computeKnownBits to catch other known cases.
- KnownBits Known = computeKnownBits(Val);
- return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+ return false;
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
@@ -4041,14 +4171,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
continue;
SDValue SrcOp = Op.getOperand(i);
- Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
+ // BUILD_VECTOR can implicitly truncate sources, we handle this specially
+ // for constant nodes to ensure we only look at the sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SrcOp)) {
+ APInt T = C->getAPIntValue().trunc(VTBits);
+ Tmp2 = T.getNumSignBits();
+ } else {
+ Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
- // BUILD_VECTOR can implicitly truncate sources, we must handle this.
- if (SrcOp.getValueSizeInBits() != VTBits) {
- assert(SrcOp.getValueSizeInBits() > VTBits &&
- "Expected BUILD_VECTOR implicit truncation");
- unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
- Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
}
Tmp = std::min(Tmp, Tmp2);
}
@@ -4225,11 +4361,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SADDO:
case ISD::UADDO:
case ISD::SADDO_CARRY:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SSUBO_CARRY:
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -4733,6 +4869,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::AssertSext:
case ISD::AssertZext:
case ISD::FREEZE:
+ case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::AND:
case ISD::OR:
@@ -4753,6 +4890,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::BITCAST:
case ISD::BUILD_VECTOR:
+ case ISD::BUILD_PAIR:
return false;
case ISD::ADD:
@@ -4771,6 +4909,13 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
Op->getFlags().hasNoUnsignedWrap());
+ case ISD::INSERT_VECTOR_ELT:{
+ // Ensure that the element index is in bounds.
+ EVT VecVT = Op.getOperand(0).getValueType();
+ KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1);
+ return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ }
+
default:
// Allow the target to implement this method for its nodes.
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
@@ -4835,7 +4980,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
- case ISD::FNEARBYINT: {
+ case ISD::FNEARBYINT:
+ case ISD::FLDEXP: {
if (SNaN)
return true;
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
@@ -4918,13 +5064,28 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
"Floating point type expected");
// If the value is a constant, we can obviously see if it is a zero or not.
- // TODO: Add BuildVector support.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
+
+ // Return false if we find any zero in a vector.
+ if (Op->getOpcode() == ISD::BUILD_VECTOR ||
+ Op->getOpcode() == ISD::SPLAT_VECTOR) {
+ for (const SDValue &OpVal : Op->op_values()) {
+ if (OpVal.isUndef())
+ return false;
+ if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
+ if (C->isZero())
+ return false;
+ }
+ return true;
+ }
return false;
}
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
assert(!Op.getValueType().isFloatingPoint() &&
"Floating point types unsupported - use isKnownNeverZeroFloat");
@@ -4933,24 +5094,105 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
[](ConstantSDNode *C) { return !C->isZero(); }))
return true;
- // TODO: Recognize more cases here.
+ // TODO: Recognize more cases here. Most of the cases are also incomplete to
+ // some degree.
switch (Op.getOpcode()) {
- default: break;
+ default:
+ break;
+
case ISD::OR:
- if (isKnownNeverZero(Op.getOperand(1)) ||
- isKnownNeverZero(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(2), Depth + 1);
+
+ case ISD::SHL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ // 1 << X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
return true;
break;
+
+ case ISD::UADDSAT:
+ case ISD::UMAX:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::UMIN:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::ABS:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really
+ // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ return true;
+ break;
+
+ case ISD::UDIV:
+ case ISD::SDIV:
+ // div exact can only produce a zero if the dividend is zero.
+ // TODO: For udiv this is also true if Op1 u<= Op0
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ break;
+
+ case ISD::ADD:
+ if (Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ // TODO: There are a lot more cases we can prove for add.
+ break;
+
+ case ISD::SUB: {
+ if (isNullConstant(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1);
+
+ std::optional<bool> ne =
+ KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1),
+ computeKnownBits(Op.getOperand(1), Depth + 1));
+ return ne && *ne;
}
- return false;
+ case ISD::MUL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ break;
+
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
+
+ return computeKnownBits(Op, Depth).isNonZero();
}
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
- // For for negative and positive zero.
+ // For negative and positive zero.
if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
if (CA->isZero() && CB->isZero()) return true;
@@ -4986,6 +5228,10 @@ static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
SDValue Other) {
if (SDValue NotOperand =
getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) {
+ if (NotOperand->getOpcode() == ISD::ZERO_EXTEND ||
+ NotOperand->getOpcode() == ISD::TRUNCATE)
+ NotOperand = NotOperand->getOperand(0);
+
if (Other == NotOperand)
return true;
if (Other->getOpcode() == ISD::AND)
@@ -4994,6 +5240,13 @@ static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
}
return false;
};
+
+ if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE)
+ A = A->getOperand(0);
+
+ if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE)
+ B = B->getOperand(0);
+
if (A->getOpcode() == ISD::AND)
return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) ||
MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B);
@@ -5159,23 +5412,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue Operand) {
+ SDValue N1) {
SDNodeFlags Flags;
if (Inserter)
Flags = Inserter->getFlags();
- return getNode(Opcode, DL, VT, Operand, Flags);
+ return getNode(Opcode, DL, VT, N1, Flags);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue Operand, const SDNodeFlags Flags) {
- assert(Operand.getOpcode() != ISD::DELETED_NODE &&
- "Operand is DELETED_NODE!");
+ SDValue N1, const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
// opaque flag is preserved during folding to prevent future folding with
// other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &Val = C->getAPIntValue();
switch (Opcode) {
default: break;
@@ -5191,7 +5443,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::ANY_EXTEND:
// Some targets like RISCV prefer to sign extend some types.
- if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT))
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
@@ -5225,15 +5477,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTPOP:
- return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::FP16_TO_FP:
case ISD::BF16_TO_FP: {
@@ -5249,7 +5501,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstantFP(FPV, DL, VT);
}
case ISD::STEP_VECTOR: {
- if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
return V;
break;
}
@@ -5257,7 +5509,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
APFloat V = C->getValueAPF(); // make copy
switch (Opcode) {
case ISD::FNEG:
@@ -5354,262 +5606,250 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
- SDValue Ops = {Operand};
+ SDValue Ops = {N1};
if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
- unsigned OpOpcode = Operand.getNode()->getOpcode();
+ unsigned OpOpcode = N1.getNode()->getOpcode();
switch (Opcode) {
case ISD::STEP_VECTOR:
assert(VT.isScalableVector() &&
"STEP_VECTOR can only be used with scalable types");
assert(OpOpcode == ISD::TargetConstant &&
- VT.getVectorElementType() == Operand.getValueType() &&
+ VT.getVectorElementType() == N1.getValueType() &&
"Unexpected step operand");
break;
case ISD::FREEZE:
- assert(VT == Operand.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false,
+ assert(VT == N1.getValueType() && "Unexpected VT!");
+ if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
/*Depth*/ 1))
- return Operand;
+ return N1;
break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
- return Operand; // Factor, merge or concat of one node? No need.
+ return N1; // Factor, merge or concat of one node? No need.
case ISD::BUILD_VECTOR: {
// Attempt to simplify BUILD_VECTOR.
- SDValue Ops[] = {Operand};
+ SDValue Ops[] = {N1};
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
return V;
break;
}
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
case ISD::FP_EXTEND:
- assert(VT.isFloatingPoint() &&
- Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
- if (Operand.getValueType() == VT) return Operand; // noop conversion.
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() &&
+ "Invalid FP cast!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid fpext node, dst < src!");
- if (Operand.isUndef())
+ assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!");
+ if (N1.isUndef())
return getUNDEF(VT);
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
- if (Operand.isUndef())
+ if (N1.isUndef())
return getUNDEF(VT);
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// [us]itofp(undef) = 0, because the result value is bounded.
- if (Operand.isUndef())
+ if (N1.isUndef())
return getConstantFP(0.0, DL, VT);
break;
case ISD::SIGN_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"SIGN_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid sext node, dst < src!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
break;
case ISD::ZERO_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"ZERO_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid zext node, dst < src!");
- if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
- return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
+ assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
break;
case ISD::ANY_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"ANY_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid anyext node, dst < src!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunc x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
- SDValue OpOp = Operand.getOperand(0);
+ SDValue OpOp = N1.getOperand(0);
if (OpOp.getValueType() == VT) {
- transferDbgValues(Operand, OpOp);
+ transferDbgValues(N1, OpOp);
return OpOp;
}
}
break;
case ISD::TRUNCATE:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid TRUNCATE!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"TRUNCATE result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop truncate
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsGT(VT) &&
- "Invalid truncate node, src < dst!");
+ assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
- if (Operand.getOperand(0).getValueType().getScalarType()
- .bitsLT(VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
- if (Operand.getOperand(0).getValueType().bitsGT(VT))
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
- return Operand.getOperand(0);
+ if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
+ VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (N1.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
+ return N1.getOperand(0);
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
- return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
+ return getVScale(DL, VT,
+ N1.getConstantOperandAPInt(0).trunc(VT.getSizeInBits()));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(Operand.getValueType().bitsLE(VT) &&
+ assert(N1.getValueType().bitsLE(VT) &&
"The input must be the same size or smaller than the result.");
assert(VT.getVectorMinNumElements() <
- Operand.getValueType().getVectorMinNumElements() &&
+ N1.getValueType().getVectorMinNumElements() &&
"The destination vector type must have fewer lanes than the input.");
break;
case ISD::ABS:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid ABS!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!");
if (OpOpcode == ISD::UNDEF)
return getConstant(0, DL, VT);
break;
case ISD::BSWAP:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid BSWAP!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!");
assert((VT.getScalarSizeInBits() % 16 == 0) &&
"BSWAP types must be a multiple of 16 bits!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// bswap(bswap(X)) -> X.
if (OpOpcode == ISD::BSWAP)
- return Operand.getOperand(0);
+ return N1.getOperand(0);
break;
case ISD::BITREVERSE:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid BITREVERSE!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::BITCAST:
- assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+ assert(VT.getSizeInBits() == N1.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
- if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (VT == N1.getValueType()) return N1; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::SCALAR_TO_VECTOR:
- assert(VT.isVector() && !Operand.getValueType().isVector() &&
- (VT.getVectorElementType() == Operand.getValueType() ||
+ assert(VT.isVector() && !N1.getValueType().isVector() &&
+ (VT.getVectorElementType() == N1.getValueType() ||
(VT.getVectorElementType().isInteger() &&
- Operand.getValueType().isInteger() &&
- VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ N1.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(N1.getValueType()))) &&
"Illegal SCALAR_TO_VECTOR node!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Operand.getOperand(1)) &&
- Operand.getConstantOperandVal(1) == 0 &&
- Operand.getOperand(0).getValueType() == VT)
- return Operand.getOperand(0);
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getConstantOperandVal(1) == 0 &&
+ N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
break;
case ISD::FNEG:
// Negation of an unknown bag of bits is still completely undefined.
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- if (OpOpcode == ISD::FNEG) // --X -> X
- return Operand.getOperand(0);
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return N1.getOperand(0);
break;
case ISD::FABS:
- if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
- return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, N1.getOperand(0));
break;
case ISD::VSCALE:
- assert(VT == Operand.getValueType() && "Unexpected VT!");
+ assert(VT == N1.getValueType() && "Unexpected VT!");
break;
case ISD::CTPOP:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return Operand;
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return N1;
break;
case ISD::CTLZ:
case ISD::CTTZ:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNOT(DL, Operand, Operand.getValueType());
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, N1, N1.getValueType());
break;
case ISD::VECREDUCE_ADD:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_XOR, DL, VT, N1);
break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_OR, DL, VT, N1);
break;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_AND, DL, VT, N1);
break;
}
SDNode *N;
SDVTList VTs = getVTList(VT);
- SDValue Ops[] = {Operand};
+ SDValue Ops[] = {N1};
if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
@@ -5710,6 +5950,10 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
APInt C2Ext = C2.zext(FullWidth);
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
}
+ case ISD::ABDS:
+ return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2);
+ case ISD::ABDU:
+ return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2);
}
return std::nullopt;
}
@@ -6678,7 +6922,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
case ISD::VECTOR_SPLICE: {
- if (cast<ConstantSDNode>(N3)->isNullValue())
+ if (cast<ConstantSDNode>(N3)->isZero())
return N1;
break;
}
@@ -6745,6 +6989,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.getValueType() == VT)
return N1;
break;
+ case ISD::VP_TRUNCATE:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ // Don't create noop casts.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
}
// Memoize node if it doesn't produce a flag.
@@ -7042,7 +7293,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
AAMDNodes NewAAInfo = AAInfo;
NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
- const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>();
+ const Value *SrcVal = dyn_cast_if_present<const Value *>(SrcPtrInfo.V);
bool isConstant =
AA && SrcVal &&
AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo));
@@ -7321,8 +7572,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+ bool IsZeroVal = isNullConstant(Src);
unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
if (!TLI.findOptimalMemOpLowering(
@@ -7870,7 +8120,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
- ((int)Opcode <= std::numeric_limits<int>::max() &&
+ (Opcode <= (unsigned)std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -7883,6 +8133,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
ID.AddInteger(MMO->getFlags());
+ ID.AddInteger(MemVT.getRawBits());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -8307,7 +8558,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -9051,6 +9302,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::GET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::SET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select undef, T, F --> T (if T is a constant), otherwise F
// select, ?, undef, F --> F
@@ -9348,6 +9653,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"Binary operator types must match!");
break;
}
+ case ISD::FFREXP: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!");
+ assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() &&
+ VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch");
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Ops[0])) {
+ int FrexpExp;
+ APFloat FrexpMant =
+ frexp(C->getValueAPF(), FrexpExp, APFloat::rmNearestTiesToEven);
+ SDValue Result0 = getConstantFP(FrexpMant, DL, VTList.VTs[0]);
+ SDValue Result1 =
+ getConstant(FrexpMant.isFinite() ? FrexpExp : 0, DL, VTList.VTs[1]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Result0, Result1}, Flags);
+ }
+
+ break;
+ }
case ISD::STRICT_FP_EXTEND:
assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
"Invalid STRICT_FP_EXTEND!");
@@ -9357,8 +9679,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"STRICT_FP_EXTEND result type should be vector iff the operand "
"type is vector!");
assert((!VTList.VTs[0].isVector() ||
- VTList.VTs[0].getVectorNumElements() ==
- Ops[1].getValueType().getVectorNumElements()) &&
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
"Invalid fpext node, dst <= src!");
@@ -9369,8 +9691,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"STRICT_FP_ROUND result type should be vector iff the operand "
"type is vector!");
assert((!VTList.VTs[0].isVector() ||
- VTList.VTs[0].getVectorNumElements() ==
- Ops[1].getValueType().getVectorNumElements()) &&
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(VTList.VTs[0].isFloatingPoint() &&
Ops[1].getValueType().isFloatingPoint() &&
@@ -10247,8 +10569,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
case ISD::ADD:
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- if (!isConstantIntBuildVectorOrConstantInt(N0) &&
- isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
uint64_t Offset = N.getConstantOperandVal(1);
// Rewrite an ADD constant node into a DIExpression. Since we are
@@ -10594,11 +10915,11 @@ public:
bool SelectionDAG::calculateDivergence(SDNode *N) {
if (TLI->isSDNodeAlwaysUniform(N)) {
- assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) &&
+ assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) &&
"Conflicting divergence information!");
return false;
}
- if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA))
+ if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA))
return true;
for (const auto &Op : N->ops()) {
if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
@@ -10975,6 +11296,12 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
return V;
}
+SDValue llvm::peekThroughTruncates(SDValue V) {
+ while (V.getOpcode() == ISD::TRUNCATE)
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
@@ -10982,7 +11309,7 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
unsigned NumBits = V.getScalarValueSizeInBits();
ConstantSDNode *C =
isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
- return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
+ return C && (C->getAPIntValue().countr_one() >= NumBits);
}
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
@@ -11394,16 +11721,11 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
}
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
- assert(N->getNumValues() == 1 &&
- "Can't unroll a vector with multiple results!");
-
EVT VT = N->getValueType(0);
- unsigned NE = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
- SDLoc dl(N);
+ unsigned NE = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Scalars;
- SmallVector<SDValue, 4> Operands(N->getNumOperands());
+ SDLoc dl(N);
// If ResNE is 0, fully unroll the vector op.
if (ResNE == 0)
@@ -11411,6 +11733,40 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
else if (NE > ResNE)
NE = ResNE;
+ if (N->getNumValues() == 2) {
+ SmallVector<SDValue, 8> Scalars0, Scalars1;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+ EVT VT1 = N->getValueType(1);
+ EVT EltVT1 = VT1.getVectorElementType();
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
+ }
+
+ SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands);
+ Scalars0.push_back(EltOp);
+ Scalars1.push_back(EltOp.getValue(1));
+ }
+
+ SDValue Vec0 = getBuildVector(VT, dl, Scalars0);
+ SDValue Vec1 = getBuildVector(VT1, dl, Scalars1);
+ return getMergeValues({Vec0, Vec1}, dl);
+ }
+
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
unsigned i;
for (i= 0; i != NE; ++i) {
for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
@@ -11533,7 +11889,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
- return (Dist * Bytes == Offset);
+ return (Dist * (int64_t)Bytes == Offset);
return false;
}
@@ -11573,6 +11929,21 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
return std::nullopt;
}
+/// Split the scalar node with EXTRACT_ELEMENT using the provided
+/// VTs and return the low/high part.
+std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N,
+ const SDLoc &DL,
+ const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() &&
+ "Split node must be a scalar type");
+ SDValue Lo =
+ getNode(ISD::EXTRACT_ELEMENT, DL, LoVT, N, getIntPtrConstant(0, DL));
+ SDValue Hi =
+ getNode(ISD::EXTRACT_ELEMENT, DL, HiVT, N, getIntPtrConstant(1, DL));
+ return std::make_pair(Lo, Hi);
+}
+
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
/// which is split (or expanded) into two not necessarily identical pieces.
std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
@@ -11786,7 +12157,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
if (!Splatted) {
- unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+ unsigned FirstDemandedIdx = DemandedElts.countr_zero();
assert(getOperand(FirstDemandedIdx).isUndef() &&
"Can only have a splat without a constant for all undefs.");
return getOperand(FirstDemandedIdx);
@@ -11908,7 +12279,7 @@ bool BuildVectorSDNode::getConstantRawBits(
// Extract raw src bits.
SmallVector<APInt> SrcBitElements(NumSrcOps,
- APInt::getNullValue(SrcEltSizeInBits));
+ APInt::getZero(SrcEltSizeInBits));
BitVector SrcUndeElements(NumSrcOps, false);
for (unsigned I = 0; I != NumSrcOps; ++I) {
@@ -11946,7 +12317,7 @@ void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
DstUndefElements.clear();
DstUndefElements.resize(NumDstOps, false);
- DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+ DstBitElements.assign(NumDstOps, APInt::getZero(DstEltSizeInBits));
// Concatenate src elements constant bits together into dst element.
if (SrcEltSizeInBits <= DstEltSizeInBits) {
@@ -12093,7 +12464,7 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
if (!TLI->isSDNodeAlwaysUniform(Node)) {
- IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA);
Node->SDNodeBits.IsDivergent = IsDivergent;
}
checkForCycles(Node);
@@ -12147,9 +12518,53 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
return getConstantFP(NeutralAF, DL, VT);
}
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXIMUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+
}
}
+/// Helper used to make a call to a library function that has one argument of
+/// pointer type.
+///
+/// Such functions include 'fegetmode', 'fesetenv' and some others, which are
+/// used to get or set floating-point state. They have one argument of pointer
+/// type, which points to the memory region containing bits of the
+/// floating-point state. The value returned by such function is ignored in the
+/// created call.
+///
+/// \param LibFunc Reference to library function (value of RTLIB::Libcall).
+/// \param Ptr Pointer used to save/load state.
+/// \param InChain Ingoing token chain.
+/// \returns Outgoing chain token.
+SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr,
+ SDValue InChain,
+ const SDLoc &DLoc) {
+ assert(InChain.getValueType() == MVT::Other && "Expected token chain");
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Ptr;
+ Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext());
+ Args.push_back(Entry);
+ RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc);
+ SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC),
+ TLI->getPointerTy(getDataLayout()));
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee,
+ std::move(Args));
+ return TLI->LowerCallTo(CLI).second;
+}
+
void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
assert(From && To && "Invalid SDNode; empty source SDValue?");
auto I = SDEI.find(From);
@@ -12158,8 +12573,90 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
// Use of operator[] on the DenseMap may cause an insertion, which invalidates
// the iterator, hence the need to make a copy to prevent a use-after-free.
- NodeExtraInfo Copy = I->second;
- SDEI[To] = std::move(Copy);
+ NodeExtraInfo NEI = I->second;
+ if (LLVM_LIKELY(!NEI.PCSections)) {
+ // No deep copy required for the types of extra info set.
+ //
+ // FIXME: Investigate if other types of extra info also need deep copy. This
+ // depends on the types of nodes they can be attached to: if some extra info
+ // is only ever attached to nodes where a replacement To node is always the
+ // node where later use and propagation of the extra info has the intended
+ // semantics, no deep copy is required.
+ SDEI[To] = std::move(NEI);
+ return;
+ }
+
+ // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
+ // through the replacement of From with To. Otherwise, replacements of a node
+ // (From) with more complex nodes (To and its operands) may result in lost
+ // extra info where the root node (To) is insignificant in further propagating
+ // and using extra info when further lowering to MIR.
+ //
+ // In the first step pre-populate the visited set with the nodes reachable
+ // from the old From node. This avoids copying NodeExtraInfo to parts of the
+ // DAG that is not new and should be left untouched.
+ SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom.
+ DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From.
+ auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) {
+ if (MaxDepth == 0) {
+ // Remember this node in case we need to increase MaxDepth and continue
+ // populating FromReach from this node.
+ Leafs.emplace_back(N);
+ return;
+ }
+ if (!FromReach.insert(N).second)
+ return;
+ for (const SDValue &Op : N->op_values())
+ Self(Self, Op.getNode(), MaxDepth - 1);
+ };
+
+ // Copy extra info to To and all its transitive operands (that are new).
+ SmallPtrSet<const SDNode *, 8> Visited;
+ auto DeepCopyTo = [&](auto &&Self, const SDNode *N) {
+ if (FromReach.contains(N))
+ return true;
+ if (!Visited.insert(N).second)
+ return true;
+ if (getEntryNode().getNode() == N)
+ return false;
+ for (const SDValue &Op : N->op_values()) {
+ if (!Self(Self, Op.getNode()))
+ return false;
+ }
+ // Copy only if entry node was not reached.
+ SDEI[N] = NEI;
+ return true;
+ };
+
+ // We first try with a lower MaxDepth, assuming that the path to common
+ // operands between From and To is relatively short. This significantly
+ // improves performance in the common case. The initial MaxDepth is big
+ // enough to avoid retry in the common case; the last MaxDepth is large
+ // enough to avoid having to use the fallback below (and protects from
+ // potential stack exhaustion from recursion).
+ for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024;
+ PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) {
+ // StartFrom is the previous (or initial) set of leafs reachable at the
+ // previous maximum depth.
+ SmallVector<const SDNode *> StartFrom;
+ std::swap(StartFrom, Leafs);
+ for (const SDNode *N : StartFrom)
+ VisitFrom(VisitFrom, N, MaxDepth - PrevDepth);
+ if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To)))
+ return;
+ // This should happen very rarely (reached the entry node).
+ LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n");
+ assert(!Leafs.empty());
+ }
+
+ // This should not happen - but if it did, that means the subgraph reachable
+ // from From has depth greater or equal to maximum MaxDepth, and VisitFrom()
+ // could not visit all reachable common operands. Consequently, we were able
+ // to reach the entry node.
+ errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n";
+ assert(false && "From subgraph too complex - increase max. MaxDepth?");
+ // Best-effort fallback if assertions disabled.
+ SDEI[To] = std::move(NEI);
}
#ifndef NDEBUG
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0bdfdac6a65f..9595da9d0d8a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -19,21 +19,21 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -67,6 +67,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -96,6 +97,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
#include <iterator>
@@ -416,6 +418,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>).
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
// Promoted vector extract
@@ -495,7 +501,6 @@ getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
CallConv);
- unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
"Copying to an illegal type!");
@@ -511,6 +516,7 @@ getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
return;
}
+ unsigned PartBits = PartVT.getSizeInBits();
if (NumParts * PartBits > ValueVT.getSizeInBits()) {
// If the parts cover more bits than the value has, promote the value.
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
@@ -621,6 +627,8 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
return SDValue();
EVT ValueVT = Val.getValueType();
+ EVT PartEVT = PartVT.getVectorElementType();
+ EVT ValueEVT = ValueVT.getVectorElementType();
ElementCount PartNumElts = PartVT.getVectorElementCount();
ElementCount ValueNumElts = ValueVT.getVectorElementCount();
@@ -628,9 +636,18 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
// fixed/scalable properties. If a target needs to widen a fixed-length type
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
- PartNumElts.isScalable() != ValueNumElts.isScalable() ||
- PartVT.getVectorElementType() != ValueVT.getVectorElementType())
+ PartNumElts.isScalable() != ValueNumElts.isScalable())
+ return SDValue();
+
+ // Have a try for bf16 because some targets share its ABI with fp16.
+ if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) {
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Cannot widen to illegal type");
+ Val = DAG.getNode(ISD::BITCAST, DL,
+ ValueVT.changeVectorElementType(MVT::f16), Val);
+ } else if (PartEVT != ValueEVT) {
return SDValue();
+ }
// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
@@ -638,12 +655,11 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
- EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
// undef elements.
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(Val, Ops);
- SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ SDValue EltUndef = DAG.getUNDEF(PartEVT);
Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
// FIXME: Use CONCAT for 2x -> 4x.
@@ -833,7 +849,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
- SDValue *Flag, const Value *V) const {
+ SDValue *Glue, const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -855,11 +871,11 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue P;
- if (!Flag) {
+ if (!Glue) {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
} else {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
- *Flag = P.getValue(2);
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Glue);
+ *Glue = P.getValue(2);
}
Chain = P.getValue(1);
@@ -918,7 +934,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
- const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -947,18 +963,18 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
SmallVector<SDValue, 8> Chains(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue Part;
- if (!Flag) {
+ if (!Glue) {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
} else {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
- *Flag = Part.getValue(1);
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Glue);
+ *Glue = Part.getValue(1);
}
Chains[i] = Part.getValue(0);
}
- if (NumRegs == 1 || Flag)
- // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ if (NumRegs == 1 || Glue)
+ // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is
// flagged to it. That is the CopyToReg nodes and the user are considered
// a single scheduling unit. If we create a TokenFactor and return it as
// chain, then the TokenFactor is both a predecessor (operand) of the
@@ -1050,6 +1066,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
+ AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
+ *DAG.getMachineFunction().getFunction().getParent());
}
void SelectionDAGBuilder::clear() {
@@ -1144,8 +1162,13 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
It != End; ++It) {
auto *Var = FnVarLocs->getDILocalVariable(It->VariableID);
dropDanglingDebugInfo(Var, It->Expr);
- if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder,
- /*IsVariadic=*/false))
+ if (It->Values.isKillLocation(It->Expr)) {
+ handleKillDebugValue(Var, It->Expr, It->DL, SDNodeOrder);
+ continue;
+ }
+ SmallVector<Value *> Values(It->Values.location_ops());
+ if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder,
+ It->Values.hasArgList()))
addDanglingDebugInfo(It, SDNodeOrder);
}
}
@@ -1205,27 +1228,46 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
+ DILocalVariable *Variable,
+ DebugLoc DL, unsigned Order,
+ RawLocationWrapper Values,
+ DIExpression *Expression) {
+ if (!Values.hasArgList())
+ return false;
+ // For variadic dbg_values we will now insert an undef.
+ // FIXME: We can potentially recover these!
+ SmallVector<SDDbgOperand, 2> Locs;
+ for (const Value *V : Values.location_ops()) {
+ auto *Undef = UndefValue::get(V->getType());
+ Locs.push_back(SDDbgOperand::fromConst(Undef));
+ }
+ SDDbgValue *SDV = DAG.getDbgValueList(Variable, Expression, Locs, {},
+ /*IsIndirect=*/false, DL, Order,
+ /*IsVariadic=*/true);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ return true;
+}
+
void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
unsigned Order) {
- DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order);
+ if (!handleDanglingVariadicDebugInfo(
+ DAG,
+ const_cast<DILocalVariable *>(DAG.getFunctionVarLocs()
+ ->getVariable(VarLoc->VariableID)
+ .getVariable()),
+ VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) {
+ DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back(
+ VarLoc, Order);
+ }
}
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
unsigned Order) {
// We treat variadic dbg_values differently at this stage.
- if (DI->hasArgList()) {
- // For variadic dbg_values we will now insert an undef.
- // FIXME: We can potentially recover these!
- SmallVector<SDDbgOperand, 2> Locs;
- for (const Value *V : DI->getValues()) {
- auto Undef = UndefValue::get(V->getType());
- Locs.push_back(SDDbgOperand::fromConst(Undef));
- }
- SDDbgValue *SDV = DAG.getDbgValueList(
- DI->getVariable(), DI->getExpression(), Locs, {},
- /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true);
- DAG.AddDbgValue(SDV, /*isParameter=*/false);
- } else {
+ if (!handleDanglingVariadicDebugInfo(
+ DAG, DI->getVariable(), DI->getDebugLoc(), Order,
+ DI->getWrappedLocation(), DI->getExpression())) {
// TODO: Dangling debug info will eventually either be resolved or produce
// an Undef DBG_VALUE. However in the resolution case, a gap may appear
// between the original dbg.value location and its resolved DBG_VALUE,
@@ -1382,6 +1424,17 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
<< "\n");
}
+void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
+ DIExpression *Expr,
+ DebugLoc DbgLoc,
+ unsigned Order) {
+ Value *Poison = PoisonValue::get(Type::getInt1Ty(*Context));
+ DIExpression *NewExpr =
+ const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr));
+ handleDebugValue(Poison, Var, NewExpr, DbgLoc, Order,
+ /*IsVariadic*/ false);
+}
+
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
DIExpression *Expr, DebugLoc DbgLoc,
@@ -1569,7 +1622,7 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) {
- if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
+ if (isIntOrFPConstant(N)) {
// Remove the debug location from the node as the node is about to be used
// in a location which may differ from the original debug location. This
// is relevant to Constant and ConstantFP nodes because they can appear
@@ -1606,7 +1659,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
- if (match(C, m_VScale(DAG.getDataLayout())))
+ if (match(C, m_VScale()))
return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1976,8 +2029,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
ComputeValueVTs(TLI, DL,
- F->getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F->getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr =
@@ -1987,7 +2040,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
- &Offsets);
+ &Offsets, 0);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -2123,7 +2176,8 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
- assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ assert((!V->use_empty() || isa<CallBrInst>(V)) &&
+ "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
}
}
@@ -2424,10 +2478,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
- DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
- MVT::Other, getControlRoot(),
- DAG.getBasicBlock(Succ0MBB)));
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Succ0MBB));
+ setValue(&I, Br);
+ DAG.setRoot(Br);
+ }
return;
}
@@ -2901,14 +2957,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
- ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+ ShiftOp, DAG.getConstant(llvm::countr_zero(B.Mask), dl, VT),
ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
- ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
- ISD::SETNE);
+ ShiftOp, DAG.getConstant(llvm::countr_one(B.Mask), dl, VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
@@ -2950,6 +3005,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
const BasicBlock *EHPadBB = I.getSuccessor(1);
+ MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
@@ -2974,6 +3030,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
+ if (EHPadMBB)
+ // a block referenced by EH table
+ // so dtor-funclet not removed by opts
+ EHPadMBB->setMachineBlockAddressTaken();
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
@@ -3338,6 +3398,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
+ Flags.setUnpredictable(
+ cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
+
// Min/max matching is only viable if all output VTs are the same.
if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
@@ -3355,6 +3418,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+ // ValueTracking's select pattern matching does not account for -0.0,
+ // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
+ // -0.0 is less than +0.0.
Value *LHS, *RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
@@ -3366,34 +3432,26 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
+ case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
- case SPNB_RETURNS_ANY: {
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ case SPNB_RETURNS_ANY:
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
Opc = ISD::FMINNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
- Opc = ISD::FMINIMUM;
- else if (UseScalarMinMax)
- Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
- ISD::FMINNUM : ISD::FMINIMUM;
break;
}
- }
break;
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
+ case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
-
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
Opc = ISD::FMAXNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
- Opc = ISD::FMAXIMUM;
- else if (UseScalarMinMax)
- Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
- ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
@@ -4123,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4196,7 +4254,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Chains[ChainI] = L.getValue(1);
if (MemVTs[i] != ValueVTs[i])
- L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
+ L = DAG.getPtrExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
}
@@ -4222,7 +4280,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4258,7 +4316,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
- ValueVTs, &Offsets);
+ ValueVTs, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4295,7 +4353,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4447,11 +4505,13 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
- uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ TypeSize ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ if (ScaleVal.isScalable())
+ return false;
// Target may not support the required addressing mode.
if (ScaleVal != 1 &&
- !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize))
+ !TLI.isLegalScaleForGatherScatter(ScaleVal.getFixedValue(), ElemSize))
return false;
Base = SDB->getValue(BasePtr);
@@ -4919,8 +4979,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Result = lowerRangeToAssertZExt(DAG, I, Result);
MaybeAlign Alignment = I.getRetAlign();
- if (!Alignment)
- Alignment = F->getAttributes().getRetAlignment();
+
// Insert `assertalign` node if there's an alignment.
if (InsertAssertAlign && Alignment) {
Result =
@@ -5504,13 +5563,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
} else
llvm_unreachable("Wrong VT for DIVFIX?");
- if (Signed) {
- LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
- RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
- } else {
- LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
- RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
- }
+ LHS = DAG.getExtOrTrunc(Signed, LHS, DL, PromVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, DL, PromVT);
EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
// For saturating operations, we need to shift up the LHS to get the
// proper saturation width, and then shift down again afterwards.
@@ -5767,6 +5821,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op)
return false;
+ // If the expression refers to the entry value of an Argument, use the
+ // corresponding livein physical register. As per the Verifier, this is only
+ // allowed for swiftasync Arguments.
+ if (Op->isReg() && Expr->isEntryValue()) {
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+ auto OpReg = Op->getReg();
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (OpReg == VirtReg || OpReg == PhysReg) {
+ SDDbgValue *SDV = DAG.getVRegDbgValue(
+ Variable, Expr, PhysReg,
+ Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n");
+ return true;
+ }
+
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
@@ -5873,7 +5947,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
visitTargetIntrinsic(I, Intrinsic);
return;
case Intrinsic::vscale: {
- match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
@@ -6092,14 +6165,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(1));
return;
}
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- // Debug intrinsics are handled seperately in assignment tracking mode.
- if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ const auto &DI = cast<DbgDeclareInst>(I);
+ // Debug intrinsics are handled separately in assignment tracking mode.
+ // Some intrinsics are handled right after Argument lowering.
+ if (AssignmentTrackingEnabled ||
+ FuncInfo.PreprocessedDbgDeclares.count(&DI))
return;
- // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
- // they are non-variadic.
- const auto &DI = cast<DbgVariableIntrinsic>(I);
+ // Assume dbg.declare can not currently use DIArgList, i.e.
+ // it is non-variadic.
assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
@@ -6118,37 +6192,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
- // Check if this variable can be described by a frame index, typically
- // either as a static alloca or a byval parameter.
- int FI = std::numeric_limits<int>::max();
- if (const auto *AI =
- dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
- if (AI->isStaticAlloca()) {
- auto I = FuncInfo.StaticAllocaMap.find(AI);
- if (I != FuncInfo.StaticAllocaMap.end())
- FI = I->second;
- }
- } else if (const auto *Arg = dyn_cast<Argument>(
- Address->stripInBoundsConstantOffsets())) {
- FI = FuncInfo.getArgumentFrameIndex(Arg);
- }
-
- // llvm.dbg.addr is control dependent and always generates indirect
- // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
- // the MachineFunction variable table.
- if (FI != std::numeric_limits<int>::max()) {
- if (Intrinsic == Intrinsic::dbg_addr) {
- SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
- dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, isParameter);
- } else {
- LLVM_DEBUG(dbgs() << "Skipping " << DI
- << " (variable info stashed in MF side table)\n");
- }
- return;
- }
-
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
@@ -6198,13 +6241,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::dbg_assign: {
// Debug intrinsics are handled seperately in assignment tracking mode.
- assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) &&
- "expected assignment tracking to be enabled");
- return;
+ if (AssignmentTrackingEnabled)
+ return;
+ // If assignment tracking hasn't been enabled then fall through and treat
+ // the dbg.assign as a dbg.value.
+ [[fallthrough]];
}
case Intrinsic::dbg_value: {
// Debug intrinsics are handled seperately in assignment tracking mode.
- if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ if (AssignmentTrackingEnabled)
return;
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
@@ -6212,11 +6257,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
- SmallVector<Value *, 4> Values(DI.getValues());
- if (Values.empty())
+
+ if (DI.isKillLocation()) {
+ handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder);
return;
+ }
- if (llvm::is_contained(Values, nullptr))
+ SmallVector<Value *, 4> Values(DI.getValues());
+ if (Values.empty())
return;
bool IsVariadic = DI.hasArgList();
@@ -6413,6 +6461,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
+ case Intrinsic::ldexp:
+ setValue(&I, DAG.getNode(ISD::FLDEXP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::frexp: {
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ setValue(&I,
+ DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0))));
+ return;
+ }
case Intrinsic::arithmetic_fence: {
setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -6515,7 +6577,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const DataLayout DLayout = DAG.getDataLayout();
EVT DestVT = TLI.getValueType(DLayout, I.getType());
EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
- unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ FPClassTest Test = static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
MachineFunction &MF = DAG.getMachineFunction();
const Function &F = MF.getFunction();
SDValue Op = getValue(I.getArgOperand(0));
@@ -6536,6 +6599,64 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, V);
return;
}
+ case Intrinsic::get_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT EnvVT = TLI.getValueType(DLayout, I.getType());
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node
+ // and temporary storage in stack.
+ if (TLI.isOperationLegalOrCustom(ISD::GET_FPENV, EnvVT)) {
+ Res = DAG.getNode(
+ ISD::GET_FPENV, sdl,
+ DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ MVT::Other),
+ Chain);
+ } else {
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI);
+ }
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::set_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ SDValue Env = getValue(I.getArgOperand(0));
+ EVT EnvVT = Env.getValueType();
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // If SET_FPENV is custom or legal, use it. Otherwise use loading
+ // environment from memory.
+ if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) {
+ Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env);
+ } else {
+ // Allocate space in stack, copy environment bits into it and use this
+ // memory in SET_FPENV_MEM.
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign,
+ MachineMemOperand::MOStore);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ }
+ DAG.setRoot(Chain);
+ return;
+ }
+ case Intrinsic::reset_fpenv:
+ DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
+ return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -7020,6 +7141,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
llvm_unreachable("instrprof failed to lower a cover");
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
+ case Intrinsic::instrprof_timestamp:
+ llvm_unreachable("instrprof failed to lower a timestamp");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
@@ -7093,10 +7216,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
- // specific calling convention, and only for x86_64.
- // FIXME: Support other platforms later.
+ // specific calling convention.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64)
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
@@ -7123,10 +7245,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
- // specific calling convention, and only for x86_64.
- // FIXME: Support other platforms later.
+ // specific calling convention.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64)
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
@@ -7174,6 +7295,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum:
visitVectorReduce(I, Intrinsic);
return;
@@ -7285,6 +7408,40 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, SetCC);
return;
}
+ case Intrinsic::experimental_get_vector_length: {
+ assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
+ "Expected positive VF");
+ unsigned VF = cast<ConstantInt>(I.getOperand(1))->getZExtValue();
+ bool IsScalable = cast<ConstantInt>(I.getOperand(2))->isOne();
+
+ SDValue Count = getValue(I.getOperand(0));
+ EVT CountVT = Count.getValueType();
+
+ if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ // Expand to a umin between the trip count and the maximum elements the type
+ // can hold.
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Extend the trip count to at least the result VT.
+ if (CountVT.bitsLT(VT)) {
+ Count = DAG.getNode(ISD::ZERO_EXTEND, sdl, VT, Count);
+ CountVT = VT;
+ }
+
+ SDValue MaxEVL = DAG.getElementCount(sdl, CountVT,
+ ElementCount::get(VF, IsScalable));
+
+ SDValue UMin = DAG.getNode(ISD::UMIN, sdl, CountVT, Count, MaxEVL);
+ // Clip to the result type if needed.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, sdl, VT, UMin);
+
+ setValue(&I, Trunc);
+ return;
+ }
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
@@ -7324,6 +7481,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_vector_splice:
visitVectorSplice(I);
return;
+ case Intrinsic::callbr_landingpad:
+ visitCallBrLandingPad(I);
+ return;
+ case Intrinsic::experimental_vector_interleave2:
+ visitVectorInterleave(I);
+ return;
+ case Intrinsic::experimental_vector_deinterleave2:
+ visitVectorDeinterleave(I);
+ return;
}
}
@@ -7442,12 +7608,12 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
std::optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
case Intrinsic::vp_ctlz: {
- bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
break;
}
case Intrinsic::vp_cttz: {
- bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
break;
}
@@ -7472,21 +7638,21 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
return *ResOPC;
}
-void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPLoad(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
- bool AddToChain = true;
// Do not serialize variable-length loads of constant memory with
// anything.
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
- AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
@@ -7498,8 +7664,9 @@ void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPGather(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
@@ -7539,8 +7706,8 @@ void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPStore(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
@@ -7561,8 +7728,8 @@ void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
setValue(&VPIntrin, ST);
}
-void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPScatter(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(1);
@@ -7604,7 +7771,8 @@ void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
}
void SelectionDAGBuilder::visitVPStridedLoad(
- const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
@@ -7629,7 +7797,7 @@ void SelectionDAGBuilder::visitVPStridedLoad(
}
void SelectionDAGBuilder::visitVPStridedStore(
- const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
@@ -7790,10 +7958,8 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF: {
- // Pop is_zero_poison operand for cp.ctlz/cttz or
- // is_int_min_poison operand for vp.abs.
- OpValues.pop_back();
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ SDValue Result =
+ DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]});
setValue(&VPIntrin, Result);
break;
}
@@ -8068,10 +8234,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
bool IsSigned) {
EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
- if (IsSigned)
- Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
- else
- Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ Value = DAG.getExtOrTrunc(IsSigned, Value, getCurSDLoc(), VT);
setValue(&I, Value);
}
@@ -8206,14 +8369,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// DAG::getMemcpy needs Alignment to be defined.
Align Alignment = std::min(DstAlign, SrcAlign);
- bool isVol = false;
SDLoc sdl = getCurSDLoc();
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
- SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
+ SDValue Root = getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, false, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
@@ -8498,6 +8660,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ if (visitBinaryFloatCall(I, ISD::FLDEXP))
+ return;
+ break;
case LibFunc_memcmp:
if (visitMemCmpBCmpCall(I))
return;
@@ -8897,7 +9065,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
- SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+ SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
bool EmitEHLabels = isa<InvokeInst>(Call);
if (EmitEHLabels) {
@@ -9124,7 +9292,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -9202,10 +9370,6 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
if (OpInfo.ConstraintType == TargetLowering::C_Address) {
- assert(InOperandVal.getValueType() ==
- TLI.getPointerTy(DAG.getDataLayout()) &&
- "Address operands expect pointer values");
-
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -9258,7 +9422,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
- OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue,
&Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
@@ -9278,12 +9442,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Finish up input operands. Set the input chain and add the flag last.
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
- if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+ if (Glue.getNode()) AsmNodeOperands.push_back(Glue);
unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
// Do additional work to generate outputs.
@@ -9341,11 +9505,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, &Call);
+ Chain, &Glue, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
- Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, getCurSDLoc(),
OpInfo, DAG);
break;
case TargetLowering::C_Memory:
@@ -9576,7 +9740,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
- SDValue Chain, InFlag, Callee;
+ SDValue Chain, InGlue, Callee;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
@@ -9593,11 +9757,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Add the STACKMAP operands, starting with DAG house-keeping.
Ops.push_back(Chain);
- Ops.push_back(InFlag);
+ Ops.push_back(InGlue);
// Add the <id>, <numShadowBytes> operands.
//
@@ -9621,9 +9785,9 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
@@ -9847,6 +10011,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
case Intrinsic::vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
+ case Intrinsic::vector_reduce_fmaximum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fminimum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags);
+ break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
@@ -9880,7 +10050,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 4> RetTys;
SmallVector<uint64_t, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
- ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0);
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
@@ -10200,7 +10370,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
+ Type *PtrRetTy =
+ PointerType::get(OrigRetTy->getContext(), DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -10452,9 +10623,9 @@ static void tryToElideArgumentCopy(
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
- SDValue ArgVal, bool &ArgHasUses) {
+ ArrayRef<SDValue> ArgVals, bool &ArgHasUses) {
// Check if this is a load from a fixed stack object.
- auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVals[0]);
if (!LNode)
return;
auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
@@ -10497,7 +10668,8 @@ static void tryToElideArgumentCopy(
MFI.setIsImmutableObjectIndex(FixedIndex, false);
AllocaIndex = FixedIndex;
ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
- Chains.push_back(ArgVal.getValue(1));
+ for (SDValue ArgVal : ArgVals)
+ Chains.push_back(ArgVal.getValue(1));
// Avoid emitting code for the store implementing the copy.
const StoreInst *SI = ArgCopyIter->second.second;
@@ -10527,8 +10699,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- F.getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
@@ -10721,8 +10893,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- F.getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -10758,9 +10930,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
+ unsigned NumParts = 0;
+ for (EVT VT : ValueVTs)
+ NumParts += TLI->getNumRegistersForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+
tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
- InVals[i], ArgHasUses);
+ ArrayRef(&InVals[i], NumParts), ArgHasUses);
}
// If this argument is unused then remember its value. It is used to generate
@@ -10872,12 +11049,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If any argument copy elisions occurred and we have debug info, update the
// stale frame indices used in the dbg.declare variable info table.
- MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
- if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
- for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
- auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (!ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI :
+ MF->getInStackSlotVariableDbgInfo()) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.getStackSlot());
if (I != ArgCopyElisionFrameIndexMap.end())
- VI.Slot = I->second;
+ VI.updateStackSlot(I->second);
}
}
@@ -11554,6 +11731,62 @@ void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
}
+void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ SDValue InVec = getValue(I.getOperand(0));
+ EVT OutVT =
+ InVec.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+
+ unsigned OutNumElts = OutVT.getVectorMinNumElements();
+
+ // ISD Node needs the input vectors split into two equal parts
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(OutNumElts, DL));
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ SDValue Even = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(0, 2, OutNumElts));
+ SDValue Odd = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(1, 2, OutNumElts));
+ SDValue Res = DAG.getMergeValues({Even, Odd}, getCurSDLoc());
+ setValue(&I, Res);
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(OutVT, OutVT), Lo, Hi);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ EVT InVT = getValue(I.getOperand(0)).getValueType();
+ SDValue InVec0 = getValue(I.getOperand(0));
+ SDValue InVec1 = getValue(I.getOperand(1));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ unsigned NumElts = InVT.getVectorMinNumElements();
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec0, InVec1);
+ setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT),
+ createInterleaveMask(NumElts, 2)));
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(InVT, InVT), InVec0, InVec1);
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0),
+ Res.getValue(1));
+ setValue(&I, Res);
+}
+
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
@@ -11599,3 +11832,113 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
Mask.push_back(Idx + i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
}
+
+// Consider the following MIR after SelectionDAG, which produces output in
+// phyregs in the first case or virtregs in the second case.
+//
+// INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx
+// %5:gr32 = COPY $ebx
+// %6:gr32 = COPY $edx
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// Given %0, we'd like to return $ebx in the first case and %5 in the second.
+// Given %1, we'd like to return $edx in the first case and %6 in the second.
+//
+// If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap
+// to a single virtreg (such as %0). The remaining outputs monotonically
+// increase in virtreg number from there. If a callbr has no outputs, then it
+// should not have a corresponding callbr landingpad; in fact, the callbr
+// landingpad would not even be able to refer to such a callbr.
+static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *MI = MRI.def_begin(Reg)->getParent();
+ // There is definitely at least one copy.
+ assert(MI->getOpcode() == TargetOpcode::COPY &&
+ "start of copy chain MUST be COPY");
+ Reg = MI->getOperand(1).getReg();
+ MI = MRI.def_begin(Reg)->getParent();
+ // There may be an optional second copy.
+ if (MI->getOpcode() == TargetOpcode::COPY) {
+ assert(Reg.isVirtual() && "expected COPY of virtual register");
+ Reg = MI->getOperand(1).getReg();
+ assert(Reg.isPhysical() && "expected COPY of physical register");
+ MI = MRI.def_begin(Reg)->getParent();
+ }
+ // The start of the chain must be an INLINEASM_BR.
+ assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+ "end of copy chain MUST be INLINEASM_BR");
+ return Reg;
+}
+
+// We must do this walk rather than the simpler
+// setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
+// otherwise we will end up with copies of virtregs only valid along direct
+// edges.
+void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
+ SmallVector<EVT, 8> ResultVTs;
+ SmallVector<SDValue, 8> ResultValues;
+ const auto *CBR =
+ cast<CallBrInst>(I.getParent()->getUniquePredecessor()->getTerminator());
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+
+ unsigned InitialDef = FuncInfo.ValueMap[CBR];
+ SDValue Chain = DAG.getRoot();
+
+ // Re-parse the asm constraints string.
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(DAG.getDataLayout(), TRI, *CBR);
+ for (auto &T : TargetConstraints) {
+ SDISelAsmOperandInfo OpInfo(T);
+ if (OpInfo.Type != InlineAsm::isOutput)
+ continue;
+
+ // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the
+ // individual constraint.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass: {
+ // Fill in OpInfo.AssignedRegs.Regs.
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, OpInfo);
+
+ // getRegistersForValue may produce 1 to many registers based on whether
+ // the OpInfo.ConstraintVT is legal on the target or not.
+ for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
+ Register OriginalDef = FollowCopyChain(MRI, InitialDef++);
+ if (Register::isPhysicalRegister(OriginalDef))
+ FuncInfo.MBB->addLiveIn(OriginalDef);
+ // Update the assigned registers to use the original defs.
+ OpInfo.AssignedRegs.Regs[i] = OriginalDef;
+ }
+
+ SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, CBR);
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ case TargetLowering::C_Other: {
+ SDValue Flag;
+ SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ ++InitialDef;
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(&I, V);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bf2111013461..f2496f24973a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,7 +31,6 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -119,25 +119,25 @@ class SelectionDAGBuilder {
: Info(VarLoc), SDNodeOrder(SDNO) {}
DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
- if (Info.is<VarLocTy>())
- return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID);
- return Info.get<DbgValTy>()->getVariable();
+ if (isa<VarLocTy>(Info))
+ return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID);
+ return cast<DbgValTy>(Info)->getVariable();
}
DIExpression *getExpression() const {
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->Expr;
- return Info.get<DbgValTy>()->getExpression();
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Expr;
+ return cast<DbgValTy>(Info)->getExpression();
}
Value *getVariableLocationOp(unsigned Idx) const {
assert(Idx == 0 && "Dangling variadic debug values not supported yet");
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->V;
- return Info.get<DbgValTy>()->getVariableLocationOp(Idx);
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx);
+ return cast<DbgValTy>(Info)->getVariableLocationOp(Idx);
}
DebugLoc getDebugLoc() const {
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->DL;
- return Info.get<DbgValTy>()->getDebugLoc();
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->DL;
+ return cast<DbgValTy>(Info)->getDebugLoc();
}
unsigned getSDNodeOrder() const { return SDNodeOrder; }
@@ -175,6 +175,10 @@ class SelectionDAGBuilder {
/// We defer handling these until we do see it.
MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+ /// Cache the module flag for whether we should use debug-info assignment
+ /// tracking.
+ bool AssignmentTrackingEnabled = false;
+
public:
/// Loads are not emitted to the program immediately. We bunch them up and
/// then emit token factor nodes when possible. This allows us to get simple
@@ -243,7 +247,7 @@ public:
SelectionDAG &DAG;
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
- const TargetLibraryInfo *LibInfo;
+ const TargetLibraryInfo *LibInfo = nullptr;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -257,7 +261,7 @@ public:
}
private:
- SelectionDAGBuilder *SDB;
+ SelectionDAGBuilder *SDB = nullptr;
};
// Data related to deferred switch lowerings. Used to construct additional
@@ -279,7 +283,7 @@ public:
SwiftErrorValueTracking &SwiftError;
/// Garbage collection metadata for the function.
- GCFunctionInfo *GFI;
+ GCFunctionInfo *GFI = nullptr;
/// Map a landing pad to the call site indexes.
DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
@@ -288,7 +292,7 @@ public:
/// a tail call. In this case, no subsequent DAG nodes should be created.
bool HasTailCall = false;
- LLVMContext *Context;
+ LLVMContext *Context = nullptr;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
@@ -372,6 +376,10 @@ public:
DIExpression *Expr, DebugLoc DbgLoc, unsigned Order,
bool IsVariadic);
+ /// Create a record for a kill location debug intrinsic.
+ void handleKillDebugValue(DILocalVariable *Var, DIExpression *Expr,
+ DebugLoc DbgLoc, unsigned Order);
+
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -534,6 +542,7 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitCallBr(const CallBrInst &I);
+ void visitCallBrLandingPad(const CallInst &I);
void visitResume(const ResumeInst &I);
void visitUnary(const User &I, unsigned Opcode);
@@ -620,17 +629,17 @@ private:
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVectorImpl<SDValue> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedStore(const VPIntrinsic &VPIntrin,
- SmallVectorImpl<SDValue> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
@@ -648,6 +657,8 @@ private:
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
void visitVectorReverse(const CallInst &I);
void visitVectorSplice(const CallInst &I);
+ void visitVectorInterleave(const CallInst &I);
+ void visitVectorDeinterleave(const CallInst &I);
void visitStepVector(const CallInst &I);
void visitUserOp1(const Instruction &I) {
@@ -669,7 +680,6 @@ private:
/// EmitFuncArgumentDbgValue.
enum class FuncArgumentDbgValueKind {
Value, // This was originally a llvm.dbg.value.
- Addr, // This was originally a llvm.dbg.addr.
Declare, // This was originally a llvm.dbg.declare.
};
@@ -760,7 +770,7 @@ struct RegsForValue {
/// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
/// flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
- const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
const Value *V = nullptr) const;
/// Emit a series of CopyToReg nodes that copies the specified value into the
@@ -769,7 +779,7 @@ struct RegsForValue {
/// flag is used. If V is not nullptr, then it is used in printing better
/// diagnostic messages on error.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
- SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
+ SDValue &Chain, SDValue *Glue, const Value *V = nullptr,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// Add this value to the specified inlineasm node operand list. This adds the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fe4261291fc5..03a1ead5bbb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -40,7 +41,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -283,6 +283,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
+ case ISD::FLDEXP: return "fldexp";
+ case ISD::STRICT_FLDEXP: return "strict_fldexp";
+ case ISD::FFREXP: return "ffrexp";
case ISD::FPOWI: return "fpowi";
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
@@ -297,6 +300,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CONCAT_VECTORS: return "concat_vectors";
case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::VECTOR_DEINTERLEAVE: return "vector_deinterleave";
+ case ISD::VECTOR_INTERLEAVE: return "vector_interleave";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
case ISD::VECTOR_SPLICE: return "vector_splice";
@@ -307,7 +312,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
- case ISD::ADDCARRY: return "addcarry";
+ case ISD::UADDO_CARRY: return "uaddo_carry";
case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
@@ -317,7 +322,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULO: return "umulo";
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
- case ISD::SUBCARRY: return "subcarry";
+ case ISD::USUBO_CARRY: return "usubo_carry";
case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
@@ -429,6 +434,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Floating point environment manipulation
case ISD::GET_ROUNDING: return "get_rounding";
case ISD::SET_ROUNDING: return "set_rounding";
+ case ISD::GET_FPENV: return "get_fpenv";
+ case ISD::SET_FPENV: return "set_fpenv";
+ case ISD::RESET_FPENV: return "reset_fpenv";
+ case ISD::GET_FPENV_MEM: return "get_fpenv_mem";
+ case ISD::SET_FPENV_MEM: return "set_fpenv_mem";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -491,6 +501,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ case ISD::VECREDUCE_FMAXIMUM: return "vecreduce_fmaximum";
+ case ISD::VECREDUCE_FMINIMUM: return "vecreduce_fminimum";
case ISD::STACKMAP:
return "stackmap";
case ISD::PATCHPOINT:
@@ -698,7 +710,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
else
OS << "<null>";
} else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
+ OS << ":" << N->getVT();
}
else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
OS << "<";
@@ -713,7 +725,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
+ OS << " from " << LD->getMemoryVT();
const char *AM = getIndexedModeName(LD->getAddressingMode());
if (*AM)
@@ -725,7 +737,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *ST->getMemOperand(), G);
if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+ OS << ", trunc to " << ST->getMemoryVT();
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
@@ -745,7 +757,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << MLd->getMemoryVT().getEVTString();
+ OS << " from " << MLd->getMemoryVT();
const char *AM = getIndexedModeName(MLd->getAddressingMode());
if (*AM)
@@ -760,7 +772,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *MSt->getMemOperand(), G);
if (MSt->isTruncatingStore())
- OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+ OS << ", trunc to " << MSt->getMemoryVT();
const char *AM = getIndexedModeName(MSt->getAddressingMode());
if (*AM)
@@ -782,7 +794,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << MGather->getMemoryVT().getEVTString();
+ OS << " from " << MGather->getMemoryVT();
auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
@@ -794,7 +806,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *MScatter->getMemOperand(), G);
if (MScatter->isTruncatingStore())
- OS << ", trunc to " << MScatter->getMemoryVT().getEVTString();
+ OS << ", trunc to " << MScatter->getMemoryVT();
auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
@@ -849,6 +861,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
Dbg->print(OS);
} else if (getHasDebugValue())
OS << " [NoOfDbgValues>0]";
+
+ if (const auto *MD = G ? G->getPCSections(this) : nullptr) {
+ OS << " [pcsections ";
+ MD->printAsOperand(OS, G->getMachineFunction().getFunction().getParent());
+ OS << ']';
+ }
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 902f46115557..35abd990f968 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -25,13 +25,12 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
@@ -49,6 +48,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -60,6 +60,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -67,6 +68,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
@@ -91,7 +93,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -425,9 +426,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI,
- FnVarLocs);
+ UniformityInfo *UA = nullptr;
+ if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
+ UA = &UAPass->getUniformityInfo();
+ CurDAG->init(*MF, *ORE, this, LibInfo, UA, PSI, BFI, FnVarLocs);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -1291,6 +1293,43 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
return true;
}
+// Mark and Report IPToState for each Block under IsEHa
+void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo();
+ if (!EHInfo)
+ return;
+ for (auto MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ const BasicBlock *BB = MBB->getBasicBlock();
+ int State = EHInfo->BlockToStateMap[BB];
+ if (BB->getFirstMayFaultInst()) {
+ // Report IP range only for blocks with Faulty inst
+ auto MBBb = MBB->getFirstNonPHI();
+ MachineInstr *MIb = &*MBBb;
+ if (MIb->isTerminator())
+ continue;
+
+ // Insert EH Labels
+ MCSymbol *BeginLabel = MMI.getContext().createTempSymbol();
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ EHInfo->addIPToStateRange(State, BeginLabel, EndLabel);
+ BuildMI(*MBB, MBBb, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(BeginLabel);
+ auto MBBe = MBB->instr_end();
+ MachineInstr *MIe = &*(--MBBe);
+ // insert before (possible multiple) terminators
+ while (MIe->isTerminator())
+ MIe = &*(--MBBe);
+ ++MBBe;
+ BuildMI(*MBB, MBBe, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(EndLabel);
+ }
+ }
+}
+
/// isFoldedOrDeadInstruction - Return true if the specified instruction is
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
@@ -1303,9 +1342,42 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
-static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
+static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Arg, DIExpression *Expr,
+ DILocalVariable *Var,
+ DebugLoc DbgLoc) {
+ if (!Expr->isEntryValue() || !isa<Argument>(Arg))
+ return false;
+
+ auto ArgIt = FuncInfo.ValueMap.find(Arg);
+ if (ArgIt == FuncInfo.ValueMap.end())
+ return false;
+ Register ArgVReg = ArgIt->getSecond();
+
+ // Find the corresponding livein physical register to this argument.
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (VirtReg == ArgVReg) {
+ FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", MCRegister=" << PhysReg
+ << ", DbgLoc=" << DbgLoc << "\n");
+ return true;
+ }
+ return false;
+}
+
+static bool processDbgDeclare(FunctionLoweringInfo &FuncInfo,
const Value *Address, DIExpression *Expr,
DILocalVariable *Var, DebugLoc DbgLoc) {
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *Var
+ << " (bad address)\n");
+ return false;
+ }
+
+ if (processIfEntryValueDbgDeclare(FuncInfo, Address, Expr, Var, DbgLoc))
+ return true;
+
MachineFunction *MF = FuncInfo.MF;
const DataLayout &DL = MF->getDataLayout();
@@ -1329,7 +1401,7 @@ static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI == std::numeric_limits<int>::max())
- return;
+ return false;
if (Offset.getBoolValue())
Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
@@ -1339,24 +1411,17 @@ static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
<< ", Expr=" << *Expr << ", FI=" << FI
<< ", DbgLoc=" << DbgLoc << "\n");
MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
+ return true;
}
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
- for (const BasicBlock &BB : *FuncInfo.Fn) {
- for (const Instruction &I : BB) {
- if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
- Value *Address = DI->getAddress();
- if (!Address) {
- LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
- << " (bad address)\n");
- continue;
- }
- processDbgDeclare(FuncInfo, Address, DI->getExpression(),
- DI->getVariable(), DI->getDebugLoc());
- }
- }
+ for (const auto &I : instructions(*FuncInfo.Fn)) {
+ const auto *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
+ DI->getVariable(), DI->getDebugLoc()))
+ FuncInfo.PreprocessedDbgDeclares.insert(DI);
}
}
@@ -1367,9 +1432,11 @@ static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
FunctionVarLocs const *FnVarLocs) {
for (auto It = FnVarLocs->single_locs_begin(),
End = FnVarLocs->single_locs_end();
- It != End; ++It)
- processDbgDeclare(FuncInfo, It->V, It->Expr,
+ It != End; ++It) {
+ assert(!It->Values.hasArgList() && "Single loc variadic ops not supported");
+ processDbgDeclare(FuncInfo, It->Values.getVariableLocationOp(0), It->Expr,
FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
+ }
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
@@ -1408,7 +1475,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
Fn.getSubprogram(),
&Fn.getEntryBlock());
R << "FastISel didn't lower all arguments: "
- << ore::NV("Prototype", Fn.getType());
+ << ore::NV("Prototype", Fn.getFunctionType());
reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
// Use SelectionDAG argument lowering
@@ -1646,6 +1713,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
ElidedArgCopyInstrs.clear();
}
+ // AsynchEH: Report Block State under -AsynchEH
+ if (Fn.getParent()->getModuleFlag("eh-asynch"))
+ reportIPToStateForBlocks(MF);
+
SP.copyToMachineFrameInfo(MF->getFrameInfo());
SwiftError->propagateVRegs();
@@ -2273,7 +2344,7 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
// Stash the chain and glue operands so we can move them to the end.
SDValue Chain = *It++;
- SDValue InFlag = *It++;
+ SDValue InGlue = *It++;
// <id> operand.
SDValue ID = *It++;
@@ -2290,7 +2361,7 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
pushStackMapLiveVariable(Ops, *It, DL);
Ops.push_back(Chain);
- Ops.push_back(InFlag);
+ Ops.push_back(InGlue);
SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
@@ -3240,7 +3311,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << CurNodeVT
<< "] from " << SwitchStart << " to " << MatcherIndex
<< '\n');
continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 57bfe344dbab..5afd05648772 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -42,7 +43,6 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
@@ -258,8 +258,7 @@ static bool willLowerDirectly(SDValue Incoming) {
if (Incoming.getValueType().getSizeInBits() > 64)
return false;
- return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) ||
- Incoming.isUndef());
+ return isIntOrFPConstant(Incoming) || Incoming.isUndef();
}
/// Try to find existing copies of the incoming values in stack slots used for
@@ -490,7 +489,7 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
Ops.push_back(std::get<0>(Res));
if (auto *MMO = std::get<2>(Res))
MemRefs.push_back(MMO);
- Chain = std::get<1>(Res);;
+ Chain = std::get<1>(Res);
Builder.DAG.setRoot(Chain);
}
@@ -1250,7 +1249,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
// All the reloads are independent and are reading memory only modified by
// statepoints (i.e. no other aliasing stores); informing SelectionDAG of
- // this this let's CSE kick in for free and allows reordering of
+ // this lets CSE kick in for free and allows reordering of
// instructions if possible. The lowering for statepoint sets the root,
// so this is ordering all reloads with the either
// a) the statepoint node itself, or
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8d4c8802f71c..a84d35a6ea4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -504,6 +504,11 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
+ // Early-out if we've ended up calling an undemanded node, leave this to
+ // constant folding.
+ if (DemandedBits.isZero() || DemandedElts.isZero())
+ return false;
+
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
@@ -552,18 +557,19 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
- const APInt &Demanded,
+ const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ EVT VT = Op.getValueType();
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
- if (Op.getValueType().isVector())
+ if (VT.isVector())
return false;
// Don't do this if the node has another user, which may require the
@@ -574,21 +580,18 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned DemandedSize = Demanded.getActiveBits();
- unsigned SmallVTBits = DemandedSize;
- if (!isPowerOf2_32(SmallVTBits))
- SmallVTBits = NextPowerOf2(SmallVTBits);
- for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ unsigned DemandedSize = DemandedBits.getActiveBits();
+ for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
+ SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
- if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
- TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
- SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
return TLO.CombineTo(Op, Z);
}
}
@@ -773,7 +776,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
@@ -805,7 +808,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExBits = ExVT.getScalarSizeInBits();
- if (DemandedBits.getActiveBits() <= ExBits)
+ if (DemandedBits.getActiveBits() <= ExBits &&
+ shouldRemoveRedundantExtend(Op))
return Op0;
// If the input is already sign extended, just drop the extension.
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
@@ -856,15 +860,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
// If we don't demand the inserted subvector, return the base vector.
if (DemandedSubElts == 0)
return Vec;
- // If this simply widens the lowest subvector, see if we can do it earlier.
- // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
- // general nodes like this.
- if (Idx == 0 && Vec.isUndef()) {
- if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
- Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), NewSub, Op.getOperand(2));
- }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -954,33 +949,30 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
SDValue ExtOpA = Add.getOperand(0);
SDValue ExtOpB = Add.getOperand(1);
- auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Add2;
+ auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
ConstantSDNode *ConstOp;
- if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
- ConstOp->isOne()) {
- ExtOpA = Op2;
- ExtOpB = Op3;
- return true;
- }
if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op3;
+ Add2 = A;
return true;
}
if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op2;
+ Add2 = A;
return true;
}
return false;
};
bool IsCeil =
(ExtOpA.getOpcode() == ISD::ADD &&
- MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
+ MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
(ExtOpB.getOpcode() == ISD::ADD &&
- MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
+ MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
// If the shift is signed (sra):
// - Needs >= 2 sign bit for both operands.
@@ -1040,11 +1032,25 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType();
unsigned MinWidth =
std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
if (VT.isVector())
NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
- if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
- return SDValue();
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
+ // If we could not transform, and (both) adds are nuw/nsw, we can use the
+ // larger type size to do the transform.
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+ return SDValue();
+
+ if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) ==
+ SelectionDAG::OFK_Never &&
+ (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1)) ==
+ SelectionDAG::OFK_Never))
+ NVT = VT;
+ else
+ return SDValue();
+ }
SDLoc DL(Op);
SDValue ResultAVG =
@@ -1198,7 +1204,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
if (!!DemandedVecElts)
- Known = KnownBits::commonBits(Known, KnownVec);
+ Known = Known.intersectWith(KnownVec);
return false;
}
@@ -1226,9 +1232,9 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedSubElts)
- Known = KnownBits::commonBits(Known, KnownSub);
+ Known = Known.intersectWith(KnownSub);
if (!!DemandedSrcElts)
- Known = KnownBits::commonBits(Known, KnownSrc);
+ Known = Known.intersectWith(KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
@@ -1290,7 +1296,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts)
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -1314,13 +1320,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -1622,7 +1628,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::VSELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
@@ -1635,7 +1641,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
@@ -1652,7 +1658,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
@@ -1724,12 +1730,9 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
- EVT ShTy = getShiftAmountTy(InnerVT, DL);
- if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
- ShTy = InnerVT;
- SDValue NarrowShl =
- TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
- TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ SDValue NarrowShl = TLO.DAG.getNode(
+ ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
@@ -1748,7 +1751,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
- DemandedBits.countTrailingZeros() >= ShAmt) {
+ DemandedBits.countr_zero() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
@@ -1771,7 +1774,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setLowBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1789,7 +1792,7 @@ bool TargetLowering::SimplifyDemandedBits(
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
// demanding those bits from the pre-shifted operand either.
- if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
+ if (unsigned CTLZ = DemandedBits.countl_zero()) {
APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
Depth + 1)) {
@@ -1814,7 +1817,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
@@ -1865,6 +1868,27 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
+ if ((BitWidth % 2) == 0 && !VT.isVector() &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(
+ Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift =
+ TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
+ }
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
@@ -1876,7 +1900,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1884,6 +1908,10 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, NewOp);
}
}
+ } else {
+ // Use generic knownbits computation as it has support for non-uniform
+ // shift amounts.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
}
break;
}
@@ -1894,7 +1922,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only want bits that already match the signbit then we don't need
// to shift.
- unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
@@ -1926,7 +1954,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- if (DemandedBits.countLeadingZeros() < ShAmt)
+ if (DemandedBits.countl_zero() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
@@ -1939,7 +1967,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
- DemandedBits.countLeadingZeros() >= ShAmt) {
+ DemandedBits.countl_zero() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
@@ -2003,8 +2031,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
- Known.One |= Known2.One;
- Known.Zero |= Known2.Zero;
+ Known = Known.unionWith(Known2);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
@@ -2059,12 +2086,12 @@ bool TargetLowering::SimplifyDemandedBits(
// See if we don't demand either half of the rotated bits.
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
- DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
}
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
- DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
}
@@ -2120,8 +2147,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If the only bits demanded come from one byte of the bswap result,
// just shift the input byte into position to eliminate the bswap.
- unsigned NLZ = DemandedBits.countLeadingZeros();
- unsigned NTZ = DemandedBits.countTrailingZeros();
+ unsigned NLZ = DemandedBits.countl_zero();
+ unsigned NTZ = DemandedBits.countr_zero();
// Round NTZ down to the next byte. If we have 11 trailing zeros, then
// we need all the bits down to bit 8. Likewise, round NLZ. If we
@@ -2557,6 +2584,15 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
@@ -2572,9 +2608,9 @@ bool TargetLowering::SimplifyDemandedBits(
// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
// If we demand exactly one bit N and we have "X * (C' << N)" where C' is
// odd (has LSB set), then the left-shifted low bit of X is the answer.
- unsigned CTZ = DemandedBits.countTrailingZeros();
+ unsigned CTZ = DemandedBits.countr_zero();
ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
- if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
+ if (C && C->getAPIntValue().countr_zero() == CTZ) {
EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
@@ -2596,11 +2632,12 @@ bool TargetLowering::SimplifyDemandedBits(
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
SDNodeFlags Flags = Op.getNode()->getFlags();
- unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+ unsigned DemandedBitsLZ = DemandedBits.countl_zero();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+ KnownBits KnownOp0, KnownOp1;
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
@@ -2697,7 +2734,14 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- [[fallthrough]];
+ if (Op.getOpcode() == ISD::MUL) {
+ Known = KnownBits::mul(KnownOp0, KnownOp1);
+ } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), KnownOp0,
+ KnownOp1);
+ }
+ break;
}
default:
// We also ask the target about intrinsics (which could be specific to it).
@@ -3914,8 +3958,7 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
- assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
+ assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -4081,8 +4124,12 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
+ // check before the emit a potentially unnecessary op.
+ if (DAG.isKnownNeverZero(CTOp))
+ return RHS;
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
}
@@ -4219,12 +4266,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
bool N1ConstOrSplat =
isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
- // Ensure that the constant occurs on the RHS and fold constant comparisons.
+ // Canonicalize toward having the constant on the RHS.
// TODO: Handle non-splat vector constants. All undef causes trouble.
// FIXME: We can't yet fold constant scalable vector splats, so avoid an
// infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
+ if (N0ConstOrSplat && !N1ConstOrSplat &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -4275,7 +4322,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
- isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+ llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
@@ -4315,7 +4362,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// DAGCombine turns costly ZExts into ANDs
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
- MinBits = C->getAPIntValue().countTrailingOnes();
+ MinBits = C->getAPIntValue().countr_one();
PreExt = N0->getOperand(0);
}
} else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
@@ -4336,7 +4383,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Figure out how many bits we need to preserve this constant.
- unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
+ unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
@@ -4510,7 +4557,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the constant doesn't fit into the number of bits for the source of
// the sign extension, it is impossible for both sides to be equal.
- if (C1.getMinSignedBits() > ExtSrcTyBits)
+ if (C1.getSignificantBits() > ExtSrcTyBits)
return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
assert(ExtDstTy == N0.getOperand(0).getValueType() &&
@@ -4744,8 +4791,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isZero();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
+ bool CmpZero = N1C->isZero();
+ bool CmpNegOne = N1C->isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4866,7 +4913,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- if (C1.getMinSignedBits() <= 64 &&
+ if (C1.getSignificantBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
@@ -4875,7 +4922,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
- unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ unsigned ShiftBits = AndRHSC.countr_zero();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
@@ -4896,14 +4943,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt NewC = C1;
ISD::CondCode NewCond = Cond;
if (AdjOne) {
- ShiftBits = C1.countTrailingOnes();
+ ShiftBits = C1.countr_one();
NewC = NewC + 1;
NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
} else {
- ShiftBits = C1.countTrailingZeros();
+ ShiftBits = C1.countr_zero();
}
NewC.lshrInPlace(ShiftBits);
- if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+ if (ShiftBits && NewC.getSignificantBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue()) &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
@@ -4980,6 +5027,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
+ // ~X > ~Y --> Y > X
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
+ // ~X > C --> X < ~C
+ if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
+ N0.getValueType().isInteger()) {
+ if (isBitwiseNot(N0)) {
+ if (isBitwiseNot(N1))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue Not = DAG.getNOT(dl, N1, OpVT);
+ return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
+ }
+ }
+ }
+
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getValueType().isInteger()) {
if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
@@ -5225,7 +5289,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
}
SDValue TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
return SDValue();
}
@@ -5308,10 +5372,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
-void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
- SmallVectorImpl<SDValue> &Ops,
- SelectionDAG &DAG) const {
- return;
+void TargetLowering::CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
}
std::pair<unsigned, const TargetRegisterClass *>
@@ -5782,7 +5844,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -5972,6 +6034,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
return SDValue();
};
@@ -6045,9 +6121,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
// the dividend exceeds the leading zeros for the divisor.
- LeadingZeros =
- std::min(LeadingZeros,
- cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
+ LeadingZeros = std::min(
+ LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero());
}
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
@@ -6146,6 +6221,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
return SDValue(); // No mulhu or equivalent
};
@@ -6298,7 +6387,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
+ unsigned K = D.countr_zero();
assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
@@ -6540,7 +6629,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
+ unsigned K = D.countr_zero();
assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
@@ -6696,9 +6785,9 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// NOTE: we avoid letting illegal types through even if we're before legalize
// ops – legalization has a hard time producing good code for the code that
// follows.
- if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
- !isOperationLegalOrCustom(Cond, VT) ||
+ !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
!isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
return SDValue();
@@ -6748,20 +6837,23 @@ SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- // Testing it with denormal inputs to avoid wrong estimate.
- if (Mode.Input == DenormalMode::IEEE) {
- // This is specifically a check for the handling of denormal inputs,
- // not the result.
- // Test = fabs(X) < SmallestNormal
- const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
- APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
- SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
- SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
- return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ // This is specifically a check for the handling of denormal inputs, not the
+ // result.
+ if (Mode.Input == DenormalMode::PreserveSign ||
+ Mode.Input == DenormalMode::PositiveZero) {
+ // Test = X == 0.0
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
- // Test = X == 0.0
- return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+
+ // Testing it with denormal inputs to avoid wrong estimate.
+ //
+ // Test = fabs(X) < SmallestNormal
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
}
SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
@@ -6769,7 +6861,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost &Cost,
unsigned Depth) const {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) {
+ if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
Cost = NegatibleCost::Cheaper;
return Op.getOperand(0);
}
@@ -7212,7 +7304,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
Merge(Lo, Hi));
else
- Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
SDValue Carry = Next.getValue(1);
@@ -7226,7 +7318,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
Carry);
else
- Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
Zero, Carry);
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
@@ -7330,7 +7422,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// If the divisor is even, shift it until it becomes odd.
unsigned TrailingZeros = 0;
if (!Divisor[0]) {
- TrailingZeros = Divisor.countTrailingZeros();
+ TrailingZeros = Divisor.countr_zero();
Divisor.lshrInPlace(TrailingZeros);
}
@@ -7342,14 +7434,10 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// then add in the carry.
// TODO: If we can't split it in half, we might be able to split into 3 or
// more pieces using a smaller bit width.
- if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
+ if (HalfMaxPlus1.urem(Divisor).isOne()) {
assert(!LL == !LH && "Expected both input halves or no input halves!");
- if (!LL) {
- LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
- DAG.getIntPtrConstant(0, dl));
- LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
- DAG.getIntPtrConstant(1, dl));
- }
+ if (!LL)
+ std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
// Shift the input by the number of TrailingZeros in the divisor. The
// shifted out bits will be added to the remainder later.
@@ -7372,13 +7460,13 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
}
- // Use addcarry if we can, otherwise use a compare to detect overflow.
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
- if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
- Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
} else {
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
@@ -7423,10 +7511,8 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getConstant(MulFactor, dl, VT));
// Split the quotient into low and high parts.
- SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(0, dl));
- SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(1, dl));
+ SDValue QuotL, QuotH;
+ std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
Result.push_back(QuotL);
Result.push_back(QuotH);
}
@@ -7915,7 +8001,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
- BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
@@ -7988,14 +8074,19 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
- // instead if there are no NaNs.
- if (Node->getFlags().hasNoNaNs()) {
+ // instead if there are no NaNs and there can't be an incompatible zero
+ // compare: at least one operand isn't +/-0, or there are no signed-zeros.
+ if ((Node->getFlags().hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
+ DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
+ (Node->getFlags().hasNoSignedZeros() ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
unsigned IEEE2018Op =
Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
- if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+ if (isOperationLegalOrCustom(IEEE2018Op, VT))
return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
Node->getOperand(1), Node->getFlags());
- }
}
if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
@@ -8004,15 +8095,39 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+/// Returns a true value if if this FPClassTest can be performed with an ordered
+/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
+/// std::nullopt if it cannot be performed as a compare with 0.
+static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
+ const fltSemantics &Semantics,
+ const MachineFunction &MF) {
+ FPClassTest OrderedMask = Test & ~fcNan;
+ FPClassTest NanTest = Test & fcNan;
+ bool IsOrdered = NanTest == fcNone;
+ bool IsUnordered = NanTest == fcNan;
+
+ // Skip cases that are testing for only a qnan or snan.
+ if (!IsOrdered && !IsUnordered)
+ return std::nullopt;
+
+ if (OrderedMask == fcZero &&
+ MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
+ return IsOrdered;
+ if (OrderedMask == (fcZero | fcSubnormal) &&
+ MF.getDenormalMode(Semantics).inputsAreZero())
+ return IsOrdered;
+ return std::nullopt;
+}
+
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
- unsigned Test, SDNodeFlags Flags,
+ FPClassTest Test, SDNodeFlags Flags,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT OperandVT = Op.getValueType();
assert(OperandVT.isFloatingPoint());
// Degenerated cases.
- if (Test == 0)
+ if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
if ((Test & fcAllFlags) == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
@@ -8028,7 +8143,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Some checks may be represented as inversion of simpler check, for example
// "inf|normal|subnormal|zero" => !"nan".
bool IsInverted = false;
- if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
IsInverted = true;
Test = InvertedCheck;
}
@@ -8043,13 +8158,40 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
- if (Test == fcZero)
+ ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+
+ if (std::optional<bool> IsCmp0 =
+ isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ IsCmp0 && (isCondCodeLegalOrCustom(
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
+ OperandVT.getScalarType().getSimpleVT()))) {
+
+ // If denormals could be implicitly treated as 0, this is not equivalent
+ // to a compare with 0 since it will also be true for denormals.
return DAG.getSetCC(DL, ResultVT, Op,
DAG.getConstantFP(0.0, DL, OperandVT),
- IsInverted ? ISD::SETUNE : ISD::SETOEQ);
- if (Test == fcNan)
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
+ }
+
+ if (Test == fcNan &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT())) {
return DAG.getSetCC(DL, ResultVT, Op, Op,
IsInverted ? ISD::SETO : ISD::SETUO);
+ }
+
+ if (Test == fcInf &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ OperandVT.getScalarType().getSimpleVT()) &&
+ isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+ // isinf(x) --> fabs(x) == inf
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+ SDValue Inf =
+ DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
+ return DAG.getSetCC(DL, ResultVT, Abs, Inf,
+ IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ }
}
// In the general case use integer operations.
@@ -8071,7 +8213,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
APInt QNaNBitMask =
APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
- APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
+ APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
@@ -8129,6 +8271,18 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
appendResult(PartialRes);
+ if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ appendResult(ExpIsZero);
+ Test &= ~PartialCheck & fcAllFlags;
+ }
+ }
+
// Check for individual classes.
if (unsigned PartialCheck = Test & fcZero) {
@@ -8141,6 +8295,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
if (unsigned PartialCheck = Test & fcInf) {
if (PartialCheck == fcPosInf)
PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
@@ -8185,19 +8352,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
- if (unsigned PartialCheck = Test & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
- SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
- SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
- SDValue VMinusOneV =
- DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
- PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
- if (PartialCheck == fcNegSubnormal)
- PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
- appendResult(PartialRes);
- }
-
if (unsigned PartialCheck = Test & fcNormal) {
// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
@@ -8609,6 +8763,38 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
+SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = DAG.getFreeze(N->getOperand(0));
+ SDValue RHS = DAG.getFreeze(N->getOperand(1));
+ bool IsSigned = N->getOpcode() == ISD::ABDS;
+
+ // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
+ // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
+ unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
+ unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
+ if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
+ SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
+ SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
+ }
+
+ // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
+ if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
+ return DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
+
+ // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
+ SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
+ return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
+}
+
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -8796,8 +8982,7 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
Tmp2 =
DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
- APInt Shift(Sz, 1);
- Shift <<= J;
+ APInt Shift = APInt::getOneBitSet(Sz, J);
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
}
@@ -9494,10 +9679,21 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
EVT VT = Op0.getValueType();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned Opcode = Node->getOpcode();
SDLoc DL(Node);
+ // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
+ if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
+ getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ Op0 = DAG.getFreeze(Op0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
+ }
+
// umin(x,y) -> sub(x,usubsat(x,y))
+ // TODO: Missing freeze(Op0)?
if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::SUB, DL, VT, Op0,
@@ -9505,30 +9701,59 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
}
// umax(x,y) -> add(x,usubsat(y,x))
+ // TODO: Missing freeze(Op0)?
if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::ADD, DL, VT, Op0,
DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
}
- // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
- ISD::CondCode CC;
- switch (Opcode) {
- default: llvm_unreachable("How did we get here?");
- case ISD::SMAX: CC = ISD::SETGT; break;
- case ISD::SMIN: CC = ISD::SETLT; break;
- case ISD::UMAX: CC = ISD::SETUGT; break;
- case ISD::UMIN: CC = ISD::SETULT; break;
- }
-
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
- EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
- return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ // Attempt to find an existing SETCC node that we can reuse.
+ // TODO: Do we need a generic doesSETCCNodeExist?
+ // TODO: Missing freeze(Op0)/freeze(Op1)?
+ auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
+ ISD::CondCode PrefCommuteCC,
+ ISD::CondCode AltCommuteCC) {
+ SDVTList BoolVTList = DAG.getVTList(BoolVT);
+ for (ISD::CondCode CC : {PrefCC, AltCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ }
+ }
+ for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op1, Op0);
+ }
+ }
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ };
+
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ // -> Y = (A < B) ? B : A
+ // -> Y = (A >= B) ? A : B
+ // -> Y = (A <= B) ? B : A
+ switch (Opcode) {
+ case ISD::SMAX:
+ return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
+ case ISD::SMIN:
+ return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
+ case ISD::UMAX:
+ return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
+ case ISD::UMIN:
+ return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
+ }
+
+ llvm_unreachable("How did we get here?");
}
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
@@ -9607,6 +9832,37 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
+ if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+
+ KnownBits KnownLHS = DAG.computeKnownBits(LHS);
+ KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+
+ // If either of the operand signs are known, then they are guaranteed to
+ // only saturate in one direction. If non-negative they will saturate
+ // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
+ //
+ // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
+ // sign of 'y' has to be flipped.
+
+ bool LHSIsNonNegative = KnownLHS.isNonNegative();
+ bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
+ : KnownRHS.isNegative();
+ if (LHSIsNonNegative || RHSIsNonNegative) {
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
+ }
+
+ bool LHSIsNegative = KnownLHS.isNegative();
+ bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
+ : KnownRHS.isNonNegative();
+ if (LHSIsNegative || RHSIsNegative) {
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
+ }
+ }
+
// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
@@ -9892,8 +10148,8 @@ void TargetLowering::expandUADDSUBO(
SDValue RHS = Node->getOperand(1);
bool IsAdd = Node->getOpcode() == ISD::UADDO;
- // If ADD/SUBCARRY is legal, use that instead.
- unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
@@ -9919,6 +10175,11 @@ void TargetLowering::expandUADDSUBO(
SetCC =
DAG.getSetCC(dl, SetCCType, Result,
DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
+ } else if (IsAdd && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X != 0.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, LHS,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
} else {
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
@@ -10271,8 +10532,10 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
- ISD::CondCode::SETUO);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
@@ -10286,13 +10549,16 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
SDValue Select = FpToInt;
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
// MinInt if Src is NaN.
- Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
- ISD::CondCode::SETULT);
+ SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
+ Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
// If Src OGT MaxFloat, select MaxInt.
- Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
- ISD::CondCode::SETOGT);
+ SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
+ Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
// In the unsigned case we are done, because we mapped NaN to MinInt, which
// is already zero.
@@ -10301,7 +10567,8 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
}
SDValue TargetLowering::expandVectorSplice(SDNode *Node,
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 2411b1ad5203..4b1d3637a746 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -53,6 +53,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -97,6 +98,9 @@ STATISTIC(NumCandidatesDropped,
static cl::opt<cl::boolOrDefault>
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<bool> EnablePostShrinkWrapOpt(
+ "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
+ cl::desc("enable splitting of the restore block if possible"));
namespace {
@@ -110,44 +114,44 @@ namespace {
class ShrinkWrap : public MachineFunctionPass {
/// Hold callee-saved information.
RegisterClassInfo RCI;
- MachineDominatorTree *MDT;
- MachinePostDominatorTree *MPDT;
+ MachineDominatorTree *MDT = nullptr;
+ MachinePostDominatorTree *MPDT = nullptr;
/// Current safe point found for the prologue.
/// The prologue will be inserted before the first instruction
/// in this basic block.
- MachineBasicBlock *Save;
+ MachineBasicBlock *Save = nullptr;
/// Current safe point found for the epilogue.
/// The epilogue will be inserted before the first terminator instruction
/// in this basic block.
- MachineBasicBlock *Restore;
+ MachineBasicBlock *Restore = nullptr;
/// Hold the information of the basic block frequency.
/// Use to check the profitability of the new points.
- MachineBlockFrequencyInfo *MBFI;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
/// Hold the loop information. Used to determine if Save and Restore
/// are in the same loop.
- MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI = nullptr;
// Emit remarks.
MachineOptimizationRemarkEmitter *ORE = nullptr;
/// Frequency of the Entry block.
- uint64_t EntryFreq;
+ uint64_t EntryFreq = 0;
/// Current opcode for frame setup.
- unsigned FrameSetupOpcode;
+ unsigned FrameSetupOpcode = ~0u;
/// Current opcode for frame destroy.
- unsigned FrameDestroyOpcode;
+ unsigned FrameDestroyOpcode = ~0u;
/// Stack pointer register, used by llvm.{savestack,restorestack}
Register SP;
/// Entry block.
- const MachineBasicBlock *Entry;
+ const MachineBasicBlock *Entry = nullptr;
using SetOfRegs = SmallSetVector<unsigned, 16>;
@@ -155,12 +159,18 @@ class ShrinkWrap : public MachineFunctionPass {
mutable SetOfRegs CurrentCSRs;
/// Current MachineFunction.
- MachineFunction *MachineFunc;
+ MachineFunction *MachineFunc = nullptr;
+
+ /// Is `true` for block numbers where we can guarantee no stack access
+ /// or computation of stack-relative addresses on any CFG path including
+ /// the block itself.
+ BitVector StackAddressUsedBlockInfo;
/// Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
- bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const;
const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
if (CurrentCSRs.empty()) {
@@ -184,6 +194,32 @@ class ShrinkWrap : public MachineFunctionPass {
/// this call.
void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+ // Try to find safe point based on dominance and block frequency without
+ // any change in IR.
+ bool performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS);
+
+ /// This function tries to split the restore point if doing so can shrink the
+ /// save point further. \return True if restore point is split.
+ bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS);
+
+ /// This function analyzes if the restore point can split to create a new
+ /// restore point. This function collects
+ /// 1. Any preds of current restore that are reachable by callee save/FI
+ /// blocks
+ /// - indicated by DirtyPreds
+ /// 2. Any preds of current restore that are not DirtyPreds - indicated by
+ /// CleanPreds
+ /// Both sets should be non-empty for considering restore point split.
+ bool checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS);
+
/// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
@@ -257,15 +293,32 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
-bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
- RegScavenger *RS) const {
- // This prevents premature stack popping when occurs a indirect stack
- // access. It is overly aggressive for the moment.
- // TODO: - Obvious non-stack loads and store, such as global values,
- // are known to not access the stack.
- // - Further, data dependency and alias analysis can validate
- // that load and stores never derive from the stack pointer.
- if (MI.mayLoadOrStore())
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const {
+ /// Check if \p Op is known to access an address not on the function's stack .
+ /// At the moment, accesses where the underlying object is a global, function
+ /// argument, or jump table are considered non-stack accesses. Note that the
+ /// caller's stack may get accessed when passing an argument via the stack,
+ /// but not the stack of the current function.
+ ///
+ auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
+ if (Op->getValue()) {
+ const Value *UO = getUnderlyingObject(Op->getValue());
+ if (!UO)
+ return false;
+ if (auto *Arg = dyn_cast<Argument>(UO))
+ return !Arg->hasPassPointeeByValueCopyAttr();
+ return isa<GlobalValue>(UO);
+ }
+ if (const PseudoSourceValue *PSV = Op->getPseudoValue())
+ return PSV->isJumpTable();
+ return false;
+ };
+ // Load/store operations may access the stack indirectly when we previously
+ // computed an address to a stack location.
+ if (StackAddressUsed && MI.mayLoadOrStore() &&
+ (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
+ !all_of(MI.memoperands(), IsKnownNonStackPtr)))
return true;
if (MI.getOpcode() == FrameSetupOpcode ||
@@ -320,18 +373,314 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
/// Helper function to find the immediate (post) dominator.
template <typename ListOfBBs, typename DominanceAnalysis>
static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
- DominanceAnalysis &Dom) {
+ DominanceAnalysis &Dom, bool Strict = true) {
MachineBasicBlock *IDom = &Block;
for (MachineBasicBlock *BB : BBs) {
IDom = Dom.findNearestCommonDominator(IDom, BB);
if (!IDom)
break;
}
- if (IDom == &Block)
+ if (Strict && IDom == &Block)
return nullptr;
return IDom;
}
+static bool isAnalyzableBB(const TargetInstrInfo &TII,
+ MachineBasicBlock &Entry) {
+ // Check if the block is analyzable.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
+}
+
+/// Determines if any predecessor of MBB is on the path from block that has use
+/// or def of CSRs/FI to MBB.
+/// ReachableByDirty: All blocks reachable from block that has use or def of
+/// CSR/FI.
+static bool
+hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ const MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *PredBB : MBB.predecessors())
+ if (ReachableByDirty.count(PredBB))
+ return true;
+ return false;
+}
+
+/// Derives the list of all the basic blocks reachable from MBB.
+static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
+ const MachineBasicBlock &MBB) {
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
+ MBB.succ_end());
+ Visited.insert(&MBB);
+ while (!Worklist.empty()) {
+ MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
+ if (!Visited.insert(SuccMBB).second)
+ continue;
+ Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
+ }
+}
+
+/// Collect blocks reachable by use or def of CSRs/FI.
+static void collectBlocksReachableByDirty(
+ const DenseSet<const MachineBasicBlock *> &DirtyBBs,
+ DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
+ for (const MachineBasicBlock *MBB : DirtyBBs) {
+ if (ReachableByDirty.count(MBB))
+ continue;
+ // Mark all offsprings as reachable.
+ markAllReachable(ReachableByDirty, *MBB);
+ }
+}
+
+/// \return true if there is a clean path from SavePoint to the original
+/// Restore.
+static bool
+isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
+ ArrayRef<MachineBasicBlock *> CleanPreds) {
+ DenseSet<const MachineBasicBlock *> Visited;
+ SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
+ CleanPreds.end());
+ while (!Worklist.empty()) {
+ MachineBasicBlock *CleanBB = Worklist.pop_back_val();
+ if (CleanBB == SavePoint)
+ return true;
+ if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
+ continue;
+ Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
+ }
+ return false;
+}
+
+/// This function updates the branches post restore point split.
+///
+/// Restore point has been split.
+/// Old restore point: MBB
+/// New restore point: NMBB
+/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
+/// previously should
+/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
+/// block layout OR
+/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
+static void updateTerminator(MachineBasicBlock *BBToUpdate,
+ MachineBasicBlock *NMBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = BBToUpdate->findBranchDebugLoc();
+ // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
+ // branch to it
+ if (!BBToUpdate->isLayoutSuccessor(NMBB))
+ TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
+}
+
+/// This function splits the restore point and returns new restore point/BB.
+///
+/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
+///
+/// Decision has been made to split the restore point.
+/// old restore point: \p MBB
+/// new restore point: \p NMBB
+/// This function makes the necessary block layout changes so that
+/// 1. \p NMBB points to \p MBB unconditionally
+/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
+static MachineBasicBlock *
+tryToSplitRestore(MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = MBB->getParent();
+
+ // get the list of DirtyPreds who have a fallthrough to MBB
+ // before the block layout change. This is just to ensure that if the NMBB is
+ // inserted after MBB, then we create unconditional branch from
+ // DirtyPred/CleanPred to NMBB
+ SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == MBB)
+ MBBFallthrough.insert(BB);
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ // Insert this block at the end of the function. Inserting in between may
+ // interfere with control flow optimizer decisions.
+ MF->insert(MF->end(), NMBB);
+
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
+ NMBB->addLiveIn(LI.PhysReg);
+
+ TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
+
+ // After splitting, all predecessors of the restore point should be dirty
+ // blocks.
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
+
+ NMBB->addSuccessor(MBB);
+
+ for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
+ updateTerminator(BBToUpdate, NMBB, TII);
+
+ return NMBB;
+}
+
+/// This function undoes the restore point split done earlier.
+///
+/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
+///
+/// Restore point was split and the change needs to be unrolled. Make necessary
+/// changes to reset restore point from \p NMBB to \p MBB.
+static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
+ MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ // For a BB, if NMBB is fallthrough in the current layout, then in the new
+ // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
+ // branch to MBB
+ SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == NMBB)
+ NMBBFallthrough.insert(BB);
+
+ NMBB->removeSuccessor(MBB);
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
+
+ NMBB->erase(NMBB->begin(), NMBB->end());
+ NMBB->eraseFromParent();
+
+ for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
+ updateTerminator(BBToUpdate, MBB, TII);
+}
+
+// A block is deemed fit for restore point split iff there exist
+// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
+// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
+bool ShrinkWrap::checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS) {
+ for (const MachineInstr &MI : *CurRestore)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
+ return false;
+
+ for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
+ if (!isAnalyzableBB(*TII, *PredBB))
+ return false;
+
+ if (ReachableByDirty.count(PredBB))
+ DirtyPreds.push_back(PredBB);
+ else
+ CleanPreds.push_back(PredBB);
+ }
+
+ return !(CleanPreds.empty() || DirtyPreds.empty());
+}
+
+bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS) {
+ if (!EnablePostShrinkWrapOpt)
+ return false;
+
+ MachineBasicBlock *InitSave = nullptr;
+ MachineBasicBlock *InitRestore = nullptr;
+
+ if (HasCandidate) {
+ InitSave = Save;
+ InitRestore = Restore;
+ } else {
+ InitRestore = nullptr;
+ InitSave = &MF.front();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ return false;
+ if (MBB.isReturnBlock()) {
+ // Do not support multiple restore points.
+ if (InitRestore)
+ return false;
+ InitRestore = &MBB;
+ }
+ }
+ }
+
+ if (!InitSave || !InitRestore || InitRestore == InitSave ||
+ !MDT->dominates(InitSave, InitRestore) ||
+ !MPDT->dominates(InitRestore, InitSave))
+ return false;
+
+ // Bail out of the optimization if any of the basic block is target of
+ // INLINEASM_BR instruction
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ DenseSet<const MachineBasicBlock *> DirtyBBs;
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHPad()) {
+ DirtyBBs.insert(&MBB);
+ continue;
+ }
+ for (const MachineInstr &MI : MBB)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
+ DirtyBBs.insert(&MBB);
+ break;
+ }
+ }
+
+ // Find blocks reachable from the use or def of CSRs/FI.
+ DenseSet<const MachineBasicBlock *> ReachableByDirty;
+ collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineBasicBlock *, 2> DirtyPreds;
+ SmallVector<MachineBasicBlock *, 2> CleanPreds;
+ if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
+ CleanPreds, TII, RS))
+ return false;
+
+ // Trying to reach out to the new save point which dominates all dirty blocks.
+ MachineBasicBlock *NewSave =
+ FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
+
+ while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ /*Entry freq has been observed more than a loop block in
+ some cases*/
+ MLI->getLoopFor(NewSave)))
+ NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
+ false);
+
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ if (!NewSave || NewSave == InitSave ||
+ isSaveReachableThroughClean(NewSave, CleanPreds) ||
+ !TFI->canUseAsPrologue(*NewSave))
+ return false;
+
+ // Now we know that splitting a restore point can isolate the restore point
+ // from clean blocks and doing so can shrink the save point.
+ MachineBasicBlock *NewRestore =
+ tryToSplitRestore(InitRestore, DirtyPreds, TII);
+
+ // Make sure if the new restore point is valid as an epilogue, depending on
+ // targets.
+ if (!TFI->canUseAsEpilogue(*NewRestore)) {
+ rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
+ return false;
+ }
+
+ Save = NewSave;
+ Restore = NewRestore;
+
+ MDT->runOnMachineFunction(MF);
+ MPDT->runOnMachineFunction(MF);
+
+ assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
+ "Incorrect save or restore point due to dominance relations");
+ assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
+ "Unexpected save or restore point in a loop");
+ assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ "Incorrect save or restore point based on block frequency");
+ return true;
+}
+
void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
RegScavenger *RS) {
// Get rid of the easy cases first.
@@ -356,7 +705,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
- if (!useOrDefCSROrFI(Terminator, RS))
+ if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
@@ -463,47 +812,24 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
return false;
}
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
- return false;
-
- LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
- init(MF);
-
- ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
- // If MF is irreducible, a block may be in a loop without
- // MachineLoopInfo reporting it. I.e., we may use the
- // post-dominance property in loops, which lead to incorrect
- // results. Moreover, we may miss that the prologue and
- // epilogue are not in the same loop, leading to unbalanced
- // construction/deconstruction of the stack frame.
- return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
- "Irreducible CFGs are not supported yet.",
- MF.getFunction().getSubprogram(), &MF.front());
- }
-
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- std::unique_ptr<RegScavenger> RS(
- TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
- for (MachineBasicBlock &MBB : MF) {
- LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
- << MBB.getName() << '\n');
+bool ShrinkWrap::performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS) {
+ for (MachineBasicBlock *MBB : RPOT) {
+ LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
- if (MBB.isEHFuncletEntry())
+ if (MBB->isEHFuncletEntry())
return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
"EH Funclets are not supported yet.",
- MBB.front().getDebugLoc(), &MBB);
+ MBB->front().getDebugLoc(), MBB);
- if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) {
+ if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
// Push the prologue and epilogue outside of the region that may throw (or
// jump out via inlineasm_br), by making sure that all the landing pads
// are at least at the boundary of the save and restore points. The
// problem is that a basic block can jump out from the middle in these
// cases, which we do not handle.
- updateSaveRestorePoints(MBB, RS.get());
+ updateSaveRestorePoints(*MBB, RS);
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
@@ -511,22 +837,37 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- for (const MachineInstr &MI : MBB) {
- if (!useOrDefCSROrFI(MI, RS.get()))
- continue;
- // Save (resp. restore) point must dominate (resp. post dominate)
- // MI. Look for the proper basic block for those.
- updateSaveRestorePoints(MBB, RS.get());
- // If we are at a point where we cannot improve the placement of
- // save/restore instructions, just give up.
- if (!ArePointsInteresting()) {
- LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
- return false;
+ bool StackAddressUsed = false;
+ // Check if we found any stack accesses in the predecessors. We are not
+ // doing a full dataflow analysis here to keep things simple but just
+ // rely on a reverse portorder traversal (RPOT) to guarantee predecessors
+ // are already processed except for loops (and accept the conservative
+ // result for loops).
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
+ StackAddressUsed = true;
+ break;
}
- // No need to look for other instructions, this basic block
- // will already be part of the handled region.
- break;
}
+
+ for (const MachineInstr &MI : *MBB) {
+ if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
+ // Save (resp. restore) point must dominate (resp. post dominate)
+ // MI. Look for the proper basic block for those.
+ updateSaveRestorePoints(*MBB, RS);
+ // If we are at a point where we cannot improve the placement of
+ // save/restore instructions, just give up.
+ if (!ArePointsInteresting()) {
+ LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ return false;
+ }
+ // No need to look for other instructions, this basic block
+ // will already be part of the handled region.
+ StackAddressUsed = true;
+ break;
+ }
+ }
+ StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
}
if (!ArePointsInteresting()) {
// If the points are not interesting at this point, then they must be null
@@ -540,13 +881,13 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
<< '\n');
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
do {
LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
- << Save->getNumber() << ' ' << Save->getName() << ' '
+ << printMBBReference(*Save) << ' '
<< MBFI->getBlockFreq(Save).getFrequency()
- << "\nRestore: " << Restore->getNumber() << ' '
- << Restore->getName() << ' '
+ << "\nRestore: " << printMBBReference(*Restore) << ' '
<< MBFI->getBlockFreq(Restore).getFrequency() << '\n');
bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
@@ -570,24 +911,61 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
break;
NewBB = Restore;
}
- updateSaveRestorePoints(*NewBB, RS.get());
+ updateSaveRestorePoints(*NewBB, RS);
} while (Save && Restore);
if (!ArePointsInteresting()) {
++NumCandidatesDropped;
return false;
}
+ return true;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ init(MF);
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+ "Irreducible CFGs are not supported yet.",
+ MF.getFunction().getSubprogram(), &MF.front());
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+ bool Changed = false;
+
+ StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
+ bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
+ StackAddressUsedBlockInfo.clear();
+ Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+ if (!HasCandidate && !Changed)
+ return false;
+ if (!ArePointsInteresting())
+ return Changed;
LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
- << Save->getNumber() << ' ' << Save->getName()
- << "\nRestore: " << Restore->getNumber() << ' '
- << Restore->getName() << '\n');
+ << printMBBReference(*Save) << ' '
+ << "\nRestore: " << printMBBReference(*Restore) << '\n');
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setSavePoint(Save);
MFI.setRestorePoint(Restore);
++NumCandidates;
- return false;
+ return Changed;
}
bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 3fed707a9eb1..d09953e76a80 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -38,21 +38,21 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPrepare : public FunctionPass {
- IntegerType *DataTy;
- Type *doubleUnderDataTy;
- Type *doubleUnderJBufTy;
- Type *FunctionContextTy;
+ IntegerType *DataTy = nullptr;
+ Type *doubleUnderDataTy = nullptr;
+ Type *doubleUnderJBufTy = nullptr;
+ Type *FunctionContextTy = nullptr;
FunctionCallee RegisterFn;
FunctionCallee UnregisterFn;
- Function *BuiltinSetupDispatchFn;
- Function *FrameAddrFn;
- Function *StackAddrFn;
- Function *StackRestoreFn;
- Function *LSDAAddrFn;
- Function *CallSiteFn;
- Function *FuncCtxFn;
- AllocaInst *FuncCtx;
- const TargetMachine *TM;
+ Function *BuiltinSetupDispatchFn = nullptr;
+ Function *FrameAddrFn = nullptr;
+ Function *StackAddrFn = nullptr;
+ Function *StackRestoreFn = nullptr;
+ Function *LSDAAddrFn = nullptr;
+ Function *CallSiteFn = nullptr;
+ Function *FuncCtxFn = nullptr;
+ AllocaInst *FuncCtx = nullptr;
+ const TargetMachine *TM = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index ee3a0164564e..47ee36971d0e 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -215,7 +215,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
--MBBI;
else
pastStart = true;
- } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ } else if (MI && !mi2iMap.contains(MI)) {
if (MBBI != Begin)
--MBBI;
else
@@ -232,7 +232,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (!MI.isDebugOrPseudoInstr() && mi2iMap.find(&MI) == mi2iMap.end())
+ if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
insertMachineInstrInMaps(MI);
}
}
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index d2273a163025..bd37d85c6c0d 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -42,15 +42,15 @@ class MachineLoopInfo;
class SpillPlacement : public MachineFunctionPass {
struct Node;
- const MachineFunction *MF;
- const EdgeBundles *bundles;
- const MachineLoopInfo *loops;
- const MachineBlockFrequencyInfo *MBFI;
+ const MachineFunction *MF = nullptr;
+ const EdgeBundles *bundles = nullptr;
+ const MachineLoopInfo *loops = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
Node *nodes = nullptr;
// Nodes that are active in the current computation. Owned by the prepare()
// caller.
- BitVector *ActiveNodes;
+ BitVector *ActiveNodes = nullptr;
// Nodes with active links. Populated by scanActiveBundles.
SmallVector<unsigned, 8> Linked;
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 92e820c9d3d8..83964eced597 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -514,10 +514,10 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
-SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+SlotIndex SplitEditor::buildSingleSubRegCopy(
+ Register FromReg, Register ToReg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, unsigned SubIdx,
+ LiveInterval &DestLI, bool Late, SlotIndex Def, const MCInstrDesc &Desc) {
bool FirstCopy = !Def.isValid();
MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
.addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
@@ -536,7 +536,8 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ const MCInstrDesc &Desc =
+ TII.get(TII.getLiveRangeSplitOpcode(FromReg, *MBB.getParent()));
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
@@ -564,7 +565,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
SlotIndex Def;
for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
- DestLI, Late, Def);
+ DestLI, Late, Def, Desc);
}
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
@@ -1365,7 +1366,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// The point we want to extend is 0d to 16e not 16r in this case, but if
// we use 16r here we will extend nothing because that already contained
// in [16e, 32d).
- unsigned OpIdx = MI->getOperandNo(&MO);
+ unsigned OpIdx = MO.getOperandNo();
unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx);
const MachineOperand &DefOp = MI->getOperand(DefOpIdx);
IsEarlyClobber = DefOp.isEarlyClobber();
@@ -1584,7 +1585,9 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
if (BI.LiveIn && BI.LiveOut)
return true;
// No point in isolating a copy. It has no register class constraints.
- if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ MachineInstr *MI = LIS.getInstructionFromIndex(BI.FirstInstr);
+ bool copyLike = TII.isCopyInstr(*MI) || MI->isSubregToReg();
+ if (copyLike)
return false;
// Finally, don't isolate an end point that was created by earlier splits.
return isOriginalEndpoint(BI.FirstInstr);
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index 5a3428a5e91f..1174e392e4e4 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -151,13 +151,13 @@ private:
/// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
/// the live range has a gap.
- unsigned NumGapBlocks;
+ unsigned NumGapBlocks = 0u;
/// ThroughBlocks - Block numbers where CurLI is live through without uses.
BitVector ThroughBlocks;
/// NumThroughBlocks - Number of live-through blocks.
- unsigned NumThroughBlocks;
+ unsigned NumThroughBlocks = 0u;
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
@@ -428,8 +428,11 @@ private:
bool Late, unsigned RegIdx);
SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
+ MachineBasicBlock &MB,
+ MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI,
+ bool Late, SlotIndex Def,
+ const MCInstrDesc &Desc);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 11c6bdc69956..66b9086e1d88 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -407,8 +407,8 @@ namespace {
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
class StackColoring : public MachineFunctionPass {
- MachineFrameInfo *MFI;
- MachineFunction *MF;
+ MachineFrameInfo *MFI = nullptr;
+ MachineFunction *MF = nullptr;
/// A class representing liveness information for a single basic block.
/// Each bit in the BitVector represents the liveness property
@@ -448,7 +448,7 @@ class StackColoring : public MachineFunctionPass {
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
- SlotIndexes *Indexes;
+ SlotIndexes *Indexes = nullptr;
/// The list of lifetime markers found. These markers are to be removed
/// once the coloring is done.
@@ -935,12 +935,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Remap debug information that refers to stack slots.
for (auto &VI : MF->getVariableDbgInfo()) {
- if (!VI.Var)
+ if (!VI.Var || !VI.inStackSlot())
continue;
- if (SlotRemap.count(VI.Slot)) {
+ int Slot = VI.getStackSlot();
+ if (SlotRemap.count(Slot)) {
LLVM_DEBUG(dbgs() << "Remapping debug info for ["
<< cast<DILocalVariable>(VI.Var)->getName() << "].\n");
- VI.Slot = SlotRemap[VI.Slot];
+ VI.updateStackSlot(SlotRemap[Slot]);
FixedDbg++;
}
}
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 3a48dd5b0a03..5d3903ed84ce 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -210,8 +210,9 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
SlotDbgMap SlotDebugMap;
// add variables to the map
- for (MachineFunction::VariableDbgInfo &DI : MF.getVariableDbgInfo())
- SlotDebugMap[DI.Slot].insert(DI.Var);
+ for (MachineFunction::VariableDbgInfo &DI :
+ MF.getInStackSlotVariableDbgInfo())
+ SlotDebugMap[DI.getStackSlot()].insert(DI.Var);
// Then add all the spills that have debug data
for (MachineBasicBlock &MBB : MF) {
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index b83c56903133..778ac1f5701c 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -48,7 +48,7 @@ namespace {
/// information provided by this pass is optional and not required by the
/// aformentioned intrinsic to function.
class StackMapLiveness : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
LivePhysRegs LiveRegs;
public:
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index bb7a51e49edb..f9115e434878 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -149,7 +149,7 @@ unsigned StatepointOpers::getGCPointerMap(
bool StatepointOpers::isFoldableReg(Register Reg) const {
unsigned FoldableAreaStart = getVarIdx();
for (const MachineOperand &MO : MI->uses()) {
- if (MI->getOperandNo(&MO) >= FoldableAreaStart)
+ if (MO.getOperandNo() >= FoldableAreaStart)
break;
if (MO.isReg() && MO.getReg() == Reg)
return false;
@@ -193,9 +193,12 @@ unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
- int RegNum = TRI->getDwarfRegNum(Reg, false);
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR)
- RegNum = TRI->getDwarfRegNum(*SR, false);
+ int RegNum;
+ for (MCPhysReg SR : TRI->superregs_inclusive(Reg)) {
+ RegNum = TRI->getDwarfRegNum(SR, false);
+ if (RegNum >= 0)
+ break;
+ }
assert(RegNum >= 0 && "Invalid Dwarf register number.");
return (unsigned)RegNum;
@@ -389,7 +392,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
break;
}
I->Size = std::max(I->Size, II->Size);
- if (TRI->isSuperRegister(I->Reg, II->Reg))
+ if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
I->Reg = II->Reg;
II->Reg = 0; // mark for deletion.
}
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 46685f7b8208..387b653f8815 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -15,9 +15,9 @@
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -96,7 +97,7 @@ bool StackProtector::runOnFunction(Function &Fn) {
SSPBufferSize = Fn.getFnAttributeAsParsedInteger(
"stack-protector-buffer-size", DefaultSSPBufferSize);
- if (!RequiresStackProtector())
+ if (!requiresStackProtector(F, &Layout))
return false;
// TODO(etienneb): Functions with funclets are not correctly supported now.
@@ -121,9 +122,9 @@ bool StackProtector::runOnFunction(Function &Fn) {
/// \param [out] IsLarge is set to true if a protectable array is found and
/// it is "large" ( >= ssp-buffer-size). In the case of a structure with
/// multiple arrays, this gets set if any of them is large.
-bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
- bool Strong,
- bool InStruct) const {
+static bool ContainsProtectableArray(Type *Ty, Module *M, unsigned SSPBufferSize,
+ bool &IsLarge, bool Strong,
+ bool InStruct) {
if (!Ty)
return false;
if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
@@ -132,7 +133,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
// add stack protectors unless the array is a character array.
// However, in strong mode any array, regardless of type and size,
// triggers a protector.
- if (!Strong && (InStruct || !Trip.isOSDarwin()))
+ if (!Strong && (InStruct || !Triple(M->getTargetTriple()).isOSDarwin()))
return false;
}
@@ -154,7 +155,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
bool NeedsProtector = false;
for (Type *ET : ST->elements())
- if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {
+ if (ContainsProtectableArray(ET, M, SSPBufferSize, IsLarge, Strong, true)) {
// If the element is a protectable array and is large (>= SSPBufferSize)
// then we are done. If the protectable array is not large, then
// keep looking in case a subsequent element is a large array.
@@ -166,8 +167,10 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-bool StackProtector::HasAddressTaken(const Instruction *AI,
- TypeSize AllocSize) {
+/// Check whether a stack allocation has its address taken.
+static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
+ Module *M,
+ SmallPtrSet<const PHINode *, 16> &VisitedPHIs) {
const DataLayout &DL = M->getDataLayout();
for (const User *U : AI->users()) {
const auto *I = cast<Instruction>(U);
@@ -221,14 +224,14 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// assume the scalable value is of minimum size.
TypeSize NewAllocSize =
TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
- if (HasAddressTaken(I, NewAllocSize))
+ if (HasAddressTaken(I, NewAllocSize, M, VisitedPHIs))
return true;
break;
}
case Instruction::BitCast:
case Instruction::Select:
case Instruction::AddrSpaceCast:
- if (HasAddressTaken(I, AllocSize))
+ if (HasAddressTaken(I, AllocSize, M, VisitedPHIs))
return true;
break;
case Instruction::PHI: {
@@ -236,7 +239,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// they are only visited once.
const auto *PN = cast<PHINode>(I);
if (VisitedPHIs.insert(PN).second)
- if (HasAddressTaken(PN, AllocSize))
+ if (HasAddressTaken(PN, AllocSize, M, VisitedPHIs))
return true;
break;
}
@@ -282,10 +285,19 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) {
/// functions with aggregates that contain any buffer regardless of type and
/// size, and functions that contain stack-based variables that have had their
/// address taken.
-bool StackProtector::RequiresStackProtector() {
+bool StackProtector::requiresStackProtector(Function *F, SSPLayoutMap *Layout) {
+ Module *M = F->getParent();
bool Strong = false;
bool NeedsProtector = false;
+ // The set of PHI nodes visited when determining if a variable's reference has
+ // been taken. This set is maintained to ensure we don't visit the same PHI
+ // node multiple times.
+ SmallPtrSet<const PHINode *, 16> VisitedPHIs;
+
+ unsigned SSPBufferSize = F->getFnAttributeAsParsedInteger(
+ "stack-protector-buffer-size", DefaultSSPBufferSize);
+
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -295,6 +307,8 @@ bool StackProtector::RequiresStackProtector() {
OptimizationRemarkEmitter ORE(F);
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ if (!Layout)
+ return true;
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
<< "Stack protection applied to function "
@@ -324,21 +338,27 @@ bool StackProtector::RequiresStackProtector() {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_LargeArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_SmallArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_SmallArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_LargeArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
@@ -346,10 +366,13 @@ bool StackProtector::RequiresStackProtector() {
}
bool IsLarge = false;
- if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
- Layout.insert(std::make_pair(AI, IsLarge
- ? MachineFrameInfo::SSPLK_LargeArray
- : MachineFrameInfo::SSPLK_SmallArray));
+ if (ContainsProtectableArray(AI->getAllocatedType(), M, SSPBufferSize,
+ IsLarge, Strong, false)) {
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(
+ AI, IsLarge ? MachineFrameInfo::SSPLK_LargeArray
+ : MachineFrameInfo::SSPLK_SmallArray));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
<< "Stack protection applied to function "
@@ -361,10 +384,14 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && HasAddressTaken(AI, M->getDataLayout().getTypeAllocSize(
- AI->getAllocatedType()))) {
+ if (Strong &&
+ HasAddressTaken(
+ AI, M->getDataLayout().getTypeAllocSize(AI->getAllocatedType()),
+ M, VisitedPHIs)) {
++NumAddrTaken;
- Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
&I)
@@ -455,18 +482,15 @@ bool StackProtector::InsertStackProtectors() {
if (&BB == FailBB)
continue;
Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
- if (!CheckLoc && !DisableCheckNoReturn) {
- for (auto &Inst : BB) {
- auto *CB = dyn_cast<CallBase>(&Inst);
- if (!CB)
- continue;
- if (!CB->doesNotReturn())
- continue;
- // Do stack check before non-return calls (e.g: __cxa_throw)
- CheckLoc = CB;
- break;
- }
- }
+ if (!CheckLoc && !DisableCheckNoReturn)
+ for (auto &Inst : BB)
+ if (auto *CB = dyn_cast<CallBase>(&Inst))
+ // Do stack check before noreturn calls that aren't nounwind (e.g:
+ // __cxa_throw).
+ if (CB->doesNotReturn() && !CB->doesNotThrow()) {
+ CheckLoc = CB;
+ break;
+ }
if (!CheckLoc)
continue;
@@ -594,18 +618,19 @@ BasicBlock *StackProtector::CreateFailBB() {
if (F->getSubprogram())
B.SetCurrentDebugLocation(
DILocation::get(Context, 0, 0, F->getSubprogram()));
+ FunctionCallee StackChkFail;
+ SmallVector<Value *, 1> Args;
if (Trip.isOSOpenBSD()) {
- FunctionCallee StackChkFail = M->getOrInsertFunction(
- "__stack_smash_handler", Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context));
-
- B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
+ StackChkFail = M->getOrInsertFunction("__stack_smash_handler",
+ Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context));
+ Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
- FunctionCallee StackChkFail =
+ StackChkFail =
M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
-
- B.CreateCall(StackChkFail, {});
}
+ cast<Function>(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn);
+ B.CreateCall(StackChkFail, Args);
B.CreateUnreachable();
return FailBB;
}
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index b8c750688914..6d933ab12041 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -58,10 +59,10 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
- LiveStacks* LS;
- MachineFrameInfo *MFI;
- const TargetInstrInfo *TII;
- const MachineBlockFrequencyInfo *MBFI;
+ LiveStacks *LS = nullptr;
+ MachineFrameInfo *MFI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
// SSIntervals - Spill slot intervals.
std::vector<LiveInterval*> SSIntervals;
@@ -90,8 +91,50 @@ namespace {
// UsedColors - "Colors" that have been assigned. This is per stack ID
SmallVector<BitVector, 2> UsedColors;
+ // Join all intervals sharing one color into a single LiveIntervalUnion to
+ // speedup range overlap test.
+ class ColorAssignmentInfo {
+ // Single liverange (used to avoid creation of LiveIntervalUnion).
+ LiveInterval *SingleLI = nullptr;
+ // LiveIntervalUnion to perform overlap test.
+ LiveIntervalUnion *LIU = nullptr;
+ // LiveIntervalUnion has a parameter in its constructor so doing this
+ // dirty magic.
+ uint8_t LIUPad[sizeof(LiveIntervalUnion)];
+
+ public:
+ ~ColorAssignmentInfo() {
+ if (LIU)
+ LIU->~LiveIntervalUnion(); // Dirty magic again.
+ }
+
+ // Return true if LiveInterval overlaps with any
+ // intervals that have already been assigned to this color.
+ bool overlaps(LiveInterval *LI) const {
+ if (LIU)
+ return LiveIntervalUnion::Query(*LI, *LIU).checkInterference();
+ return SingleLI ? SingleLI->overlaps(*LI) : false;
+ }
+
+ // Add new LiveInterval to this color.
+ void add(LiveInterval *LI, LiveIntervalUnion::Allocator &Alloc) {
+ assert(!overlaps(LI));
+ if (LIU) {
+ LIU->unify(*LI, *LI);
+ } else if (SingleLI) {
+ LIU = new (LIUPad) LiveIntervalUnion(Alloc);
+ LIU->unify(*SingleLI, *SingleLI);
+ LIU->unify(*LI, *LI);
+ SingleLI = nullptr;
+ } else
+ SingleLI = LI;
+ }
+ };
+
+ LiveIntervalUnion::Allocator LIUAlloc;
+
// Assignments - Color to intervals mapping.
- SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+ SmallVector<ColorAssignmentInfo, 16> Assignments;
public:
static char ID; // Pass identification
@@ -116,7 +159,6 @@ namespace {
private:
void InitializeSlots();
void ScanForSpillSlotRefs(MachineFunction &MF);
- bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
bool ColorSlots(MachineFunction &MF);
void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
@@ -247,19 +289,6 @@ void StackSlotColoring::InitializeSlots() {
NextColors[I] = AllColors[I].find_first();
}
-/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
-/// LiveIntervals that have already been assigned to the specified color.
-bool
-StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
- const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color];
- for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
- LiveInterval *OtherLI = OtherLIs[i];
- if (OtherLI->overlaps(*li))
- return true;
- }
- return false;
-}
-
/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
@@ -272,7 +301,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Check if it's possible to reuse any of the used colors.
Color = UsedColors[StackID].find_first();
while (Color != -1) {
- if (!OverlapWithAssignments(li, Color)) {
+ if (!Assignments[Color].overlaps(li)) {
Share = true;
++NumEliminated;
break;
@@ -298,7 +327,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
assert(MFI->getStackID(Color) == MFI->getStackID(FI));
// Record the assignment.
- Assignments[Color].push_back(li);
+ Assignments[Color].add(li, LIUAlloc);
LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -515,8 +544,6 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
OrigSizes.clear();
AllColors.clear();
UsedColors.clear();
- for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
- Assignments[i].clear();
Assignments.clear();
return Changed;
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 865add28f781..5ed67bd0a121 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -427,7 +427,13 @@ void TailDuplicator::duplicateInstruction(
} else {
// For mapped registers that do not have sub-registers, simply
// restrict their class to match the original one.
- ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
+
+ // We don't want debug instructions affecting the resulting code so
+ // if we're cloning a debug instruction then just use MappedRC
+ // rather than constraining the register class further.
+ ConstrRC = NewMI.isDebugInstr()
+ ? MappedRC
+ : MRI->constrainRegClass(VI->second.Reg, OrigRC);
}
if (ConstrRC) {
@@ -436,16 +442,13 @@ void TailDuplicator::duplicateInstruction(
MO.setReg(VI->second.Reg);
// We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
// sub-register, we need to compose the sub-register indices.
- MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
- VI->second.SubReg));
+ MO.setSubReg(
+ TRI->composeSubRegIndices(VI->second.SubReg, MO.getSubReg()));
} else {
// The direct replacement is not possible, due to failing register
// class constraints. An explicit COPY is necessary. Create one
- // that can be reused
- auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
- if (NewRC == nullptr)
- NewRC = OrigRC;
- Register NewReg = MRI->createVirtualRegister(NewRC);
+ // that can be reused.
+ Register NewReg = MRI->createVirtualRegister(OrigRC);
BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
@@ -1016,13 +1019,11 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
DenseMap<Register, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
+ for (MachineInstr &MI : make_early_inc_range(TailBB->phis())) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- MachineInstr *MI = &*I++;
- processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
}
appendCopies(PredBB, CopyInfos, Copies);
}
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9430e86fe44d..48a2094f5d45 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -131,16 +130,6 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
-unsigned TargetFrameLowering::getStackAlignmentSkew(
- const MachineFunction &MF) const {
- // When HHVM function is called, the stack is skewed as the return address
- // is removed from the stack before we enter the function.
- if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM))
- return MF.getTarget().getAllocaPointerSize();
-
- return 0;
-}
-
bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
const MachineFunction &MF) const {
if (!hasFP(MF))
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 0f6cf11ca9d1..09dcddc17b06 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -439,8 +440,9 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
unsigned FoldIdx) {
- assert(MI.isCopy() && "MI must be a COPY instruction");
+ assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction");
if (MI.getNumOperands() != 2)
return nullptr;
assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
@@ -629,10 +631,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
}
// Straight COPY may fold as load/store.
- if (!MI.isCopy() || Ops.size() != 1)
+ if (!isCopyInstr(MI) || Ops.size() != 1)
return nullptr;
- const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]);
if (!RC)
return nullptr;
@@ -695,6 +697,61 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
return NewMI;
}
+/// transferImplicitOperands - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit
+/// operands from MI to the replacement instruction.
+static void transferImplicitOperands(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ Register DstReg = MI->getOperand(0).getReg();
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ CopyMI->addOperand(MO);
+
+ // Be conservative about preserving kills when subregister defs are
+ // involved. If there was implicit kill of a super-register overlapping the
+ // copy result, we would kill the subregisters previous copies defined.
+
+ if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
+ CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
+ }
+}
+
+void TargetInstrInfo::lowerCopy(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) const {
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
+ if (IdentityCopy || SrcMO.isUndef()) {
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return;
+ }
+
+ copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(),
+ SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ transferImplicitOperands(MI, TRI);
+ MI->eraseFromParent();
+ return;
+}
+
bool TargetInstrInfo::hasReassociableOperands(
const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
const MachineOperand &Op1 = Inst.getOperand(1);
@@ -1016,6 +1073,17 @@ void TargetInstrInfo::reassociateOps(
InsInstrs.push_back(MIB2);
DelInstrs.push_back(&Prev);
DelInstrs.push_back(&Root);
+
+ // We transformed:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+ // Into:
+ // B = X op Y (MIB1)
+ // C = A op B (MIB2)
+ // C has the same value as before, B doesn't; as such, keep the debug number
+ // of C but not of B.
+ if (unsigned OldRootNum = Root.peekDebugInstrNum())
+ MIB2.getInstr()->setDebugInstrNum(OldRootNum);
}
void TargetInstrInfo::genAlternativeCodeSequence(
@@ -1037,18 +1105,20 @@ void TargetInstrInfo::genAlternativeCodeSequence(
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
break;
default:
- break;
+ llvm_unreachable("Unknown pattern for machine combiner");
}
// Don't reassociate if Prev and Root are in different blocks.
if (Prev->getParent() != Root.getParent())
return;
- assert(Prev && "Unknown pattern for machine combiner");
-
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
}
+MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
+ return MachineTraceStrategy::TS_MinInstrCount;
+}
+
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getMF();
@@ -1329,11 +1399,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
if (Reg == DestReg)
return ParamLoadedValue(*DestSrc->Source, Expr);
- // Cases where super- or sub-registers needs to be described should
- // be handled by the target's hook implementation.
- assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
- "TargetInstrInfo::describeLoadedValue can't describe super- or "
- "sub-regs for copy instructions");
+ // If the target's hook couldn't describe this copy, give up.
return std::nullopt;
} else if (auto RegImm = isAddImmediate(MI, Reg)) {
Register SrcReg = RegImm->Reg;
@@ -1555,15 +1621,107 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes(
F.addFnAttr(Attribute::NoUnwind);
}
+outliner::InstrType TargetInstrInfo::getOutliningType(
+ MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+
+ // NOTE: MI.isMetaInstruction() will match CFI_INSTRUCTION, but some targets
+ // have support for outlining those. Special-case that here.
+ if (MI.isCFIInstruction())
+ // Just go right to the target implementation.
+ return getOutliningTypeImpl(MIT, Flags);
+
+ // Be conservative about inline assembly.
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Labels generally can't safely be outlined.
+ if (MI.isLabel())
+ return outliner::InstrType::Illegal;
+
+ // Don't let debug instructions impact analysis.
+ if (MI.isDebugInstr())
+ return outliner::InstrType::Invisible;
+
+ // Some other special cases.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::LIFETIME_START:
+ case TargetOpcode::LIFETIME_END:
+ return outliner::InstrType::Invisible;
+ default:
+ break;
+ }
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // If this is a branch to another block, we can't outline it.
+ if (!MI.getParent()->succ_empty())
+ return outliner::InstrType::Illegal;
+
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands of this instruction do anything that
+ // might break if they're moved outside their current function.
+ // This includes MachineBasicBlock references, BlockAddressses,
+ // Constant pool indices and jump table indices.
+ //
+ // A quick note on MO_TargetIndex:
+ // This doesn't seem to be used in any of the architectures that the
+ // MachineOutliner supports, but it was still filtered out in all of them.
+ // There was one exception (RISC-V), but MO_TargetIndex also isn't used there.
+ // As such, this check is removed both here and in the target-specific
+ // implementations. Instead, we assert to make sure this doesn't
+ // catch anyone off-guard somewhere down the line.
+ for (const MachineOperand &MOP : MI.operands()) {
+ // If you hit this assertion, please remove it and adjust
+ // `getOutliningTypeImpl` for your target appropriately if necessary.
+ // Adding the assertion back to other supported architectures
+ // would be nice too :)
+ assert(!MOP.isTargetIndex() && "This isn't used quite yet!");
+
+ // CFI instructions should already have been filtered out at this point.
+ assert(!MOP.isCFIIndex() && "CFI instructions handled elsewhere!");
+
+ // PrologEpilogInserter should've already run at this point.
+ assert(!MOP.isFI() && "FrameIndex instructions should be gone by now!");
+
+ if (MOP.isMBB() || MOP.isBlockAddress() || MOP.isCPI() || MOP.isJTI())
+ return outliner::InstrType::Illegal;
+ }
+
+ // If we don't know, delegate to the target-specific hook.
+ return getOutliningTypeImpl(MIT, Flags);
+}
+
bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
// Some instrumentations create special TargetOpcode at the start which
// expands to special code sequences which must be present.
auto First = MBB.getFirstNonDebugInstr();
- if (First != MBB.end() &&
- (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
- First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER))
+ if (First == MBB.end())
+ return true;
+
+ if (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
+ First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)
+ return false;
+
+ // Some instrumentations create special pseudo-instructions at or just before
+ // the end that must be present.
+ auto Last = MBB.getLastNonDebugInstr();
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_RET ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
return false;
+ if (Last != First && Last->isReturn()) {
+ --Last;
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_EXIT ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
+ return false;
+ }
return true;
}
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index da8b87babc2d..badb7fe53333 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,6 +28,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -49,10 +49,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -209,6 +209,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
+
+ if (TT.isOSWindows() && !TT.isOSCygMing()) {
+ setLibcallName(RTLIB::LDEXP_F32, nullptr);
+ setLibcallName(RTLIB::LDEXP_F80, nullptr);
+ setLibcallName(RTLIB::LDEXP_F128, nullptr);
+ setLibcallName(RTLIB::LDEXP_PPCF128, nullptr);
+
+ setLibcallName(RTLIB::FREXP_F32, nullptr);
+ setLibcallName(RTLIB::FREXP_F80, nullptr);
+ setLibcallName(RTLIB::FREXP_F128, nullptr);
+ setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
+ }
}
/// GetFPLibCall - Helper to return the right libcall for the given floating
@@ -498,6 +510,16 @@ RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) {
POWI_PPCF128);
}
+RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) {
+ return getFPLibCall(RetVT, LDEXP_F32, LDEXP_F64, LDEXP_F80, LDEXP_F128,
+ LDEXP_PPCF128);
+}
+
+RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
+ return getFPLibCall(RetVT, FREXP_F32, FREXP_F64, FREXP_F80, FREXP_F128,
+ FREXP_PPCF128);
+}
+
RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
unsigned ModeN, ModelN;
@@ -724,7 +746,9 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
- MaxDivRemBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
+ // Assume that even with libcalls, no target supports wider than 128 bit
+ // division.
+ MaxDivRemBitWidthSupported = 128;
MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
@@ -819,8 +843,8 @@ void TargetLoweringBase::initActions() {
ISD::SMULO, ISD::UMULO},
VT, Expand);
- // ADDCARRY operations default to expand
- setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY,
+ // Carry-using overflow operations default to expand.
+ setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY,
ISD::SADDO_CARRY, ISD::SSUBO_CARRY},
VT, Expand);
@@ -843,7 +867,9 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand);
+ setOperationAction(
+ {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP},
+ VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector())
@@ -867,16 +893,22 @@ void TargetLoweringBase::initActions() {
ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
- ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
VT, Expand);
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
- // VP_SREM/UREM default to expand.
- // TODO: Expand all VP intrinsics.
- setOperationAction(ISD::VP_SREM, VT, Expand);
- setOperationAction(ISD::VP_UREM, VT, Expand);
+ // VP operations default to expand.
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
+ setOperationAction(ISD::SDOPC, VT, Expand);
+#include "llvm/IR/VPIntrinsics.def"
+
+ // FP environment operations default to expand.
+ setOperationAction(ISD::GET_FPENV, VT, Expand);
+ setOperationAction(ISD::SET_FPENV, VT, Expand);
+ setOperationAction(ISD::RESET_FPENV, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -907,6 +939,9 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -1137,8 +1172,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned LaneSizeInBits = NewVT.getScalarSizeInBits();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(LaneSizeInBits))
- LaneSizeInBits = NextPowerOf2(LaneSizeInBits);
+ LaneSizeInBits = llvm::bit_ceil(LaneSizeInBits);
MVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
@@ -1627,7 +1661,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
TypeSize NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize.getKnownMinValue()))
+ if (!llvm::has_single_bit<uint32_t>(NewVTSize.getKnownMinValue()))
NewVTSize = NewVTSize.coefficientNextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
}
@@ -1691,7 +1725,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
// conventions. The frontend should mark functions whose return values
// require promoting with signext or zeroext attributes.
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ MVT MinVT = TLI.getRegisterType(MVT::i32);
if (VT.bitsLT(MinVT))
VT = MinVT;
}
@@ -1976,9 +2010,10 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
"__stack_chk_guard");
// FreeBSD has "__stack_chk_guard" defined externally on libc.so
- if (TM.getRelocationModel() == Reloc::Static &&
+ if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
- !TM.getTargetTriple().isOSFreeBSD())
+ !TM.getTargetTriple().isOSFreeBSD() &&
+ !TM.getTargetTriple().isOSDarwin())
GV->setDSOLocal(true);
}
}
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e760564779c2..3994552884c4 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -65,12 +64,17 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <string>
using namespace llvm;
using namespace dwarf;
+static cl::opt<bool> JumpTableInFunctionSection(
+ "jumptable-in-function-section", cl::Hidden, cl::init(false),
+ cl::desc("Putting Jump Table in function section"));
+
static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
StringRef &Section) {
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
@@ -182,26 +186,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
- if (isPositionIndependent()) {
- // ILP32 uses sdata4 instead of sdata8
- if (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- }
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
+ //
+ // Use DW_EH_PE_indirect even for -fno-pic to avoid copy relocations.
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32
+ ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
+ PersonalityEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
+ TTypeEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
break;
case Triple::lanai:
LSDAEncoding = dwarf::DW_EH_PE_absptr;
@@ -591,14 +583,7 @@ static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
if (!MD)
return nullptr;
- const MDOperand &Op = MD->getOperand(0);
- if (!Op.get())
- return nullptr;
-
- auto *VM = dyn_cast<ValueAsMetadata>(Op);
- if (!VM)
- report_fatal_error("MD_associated operand is not ValueAsMetadata");
-
+ auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get());
auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
}
@@ -629,21 +614,21 @@ static unsigned getEntrySizeForKind(SectionKind Kind) {
/// Return the section prefix name used by options FunctionsSections and
/// DataSections.
-static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
+static StringRef getSectionPrefixForGlobal(SectionKind Kind, bool IsLarge) {
if (Kind.isText())
return ".text";
if (Kind.isReadOnly())
- return ".rodata";
+ return IsLarge ? ".lrodata" : ".rodata";
if (Kind.isBSS())
- return ".bss";
+ return IsLarge ? ".lbss" : ".bss";
if (Kind.isThreadData())
return ".tdata";
if (Kind.isThreadBSS())
return ".tbss";
if (Kind.isData())
- return ".data";
+ return IsLarge ? ".ldata" : ".data";
if (Kind.isReadOnlyWithRel())
- return ".data.rel.ro";
+ return IsLarge ? ".ldata.rel.ro" : ".data.rel.ro";
llvm_unreachable("Unknown section kind");
}
@@ -665,7 +650,10 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
Name = ".rodata.cst";
Name += utostr(EntrySize);
} else {
- Name = getSectionPrefixForGlobal(Kind);
+ bool IsLarge = false;
+ if (isa<GlobalVariable>(GO))
+ IsLarge = TM.isLargeData();
+ Name = getSectionPrefixForGlobal(Kind, IsLarge);
}
bool HasPrefix = false;
@@ -867,6 +855,12 @@ static MCSectionELF *selectELFSectionForGlobal(
Group = C->getName();
IsComdat = C->getSelectionKind() == Comdat::Any;
}
+ if (isa<GlobalVariable>(GO)) {
+ if (TM.isLargeData()) {
+ assert(TM.getTargetTriple().getArch() == Triple::x86_64);
+ Flags |= ELF::SHF_X86_64_LARGE;
+ }
+ }
// Get the section entry size based on the kind.
unsigned EntrySize = getEntrySizeForKind(Kind);
@@ -1217,11 +1211,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag
- // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here.
- if (TM->Options.LowerGlobalDtorsViaCxaAtExit)
- report_fatal_error("@llvm.global_dtors should have been lowered already");
return StaticDtorSection;
+ // In userspace, we lower global destructors via atexit(), but kernel/kext
+ // environments do not provide this function so we still need to support the
+ // legacy way here.
+ // See the -disable-atexit-based-global-dtor-lowering CodeGen flag for more
+ // context.
}
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
@@ -1282,6 +1277,20 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
StringRef SectionName = GO->getSection();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+ if (GV && GV->hasImplicitSection()) {
+ auto Attrs = GV->getAttributes();
+ if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+ SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+ } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+ SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
+ } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+ SectionName = Attrs.getAttribute("data-section").getValueAsString();
+ }
+ }
+
const Function *F = dyn_cast<Function>(GO);
if (F && F->hasFnAttribute("implicit-section-name")) {
SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
@@ -1411,6 +1420,11 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
return ReadOnlySection; // .const
}
+MCSection *TargetLoweringObjectFileMachO::getSectionForCommandLines() const {
+ return getContext().getMachOSection("__TEXT", "__command_line", 0,
+ SectionKind::getReadOnly());
+}
+
const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
@@ -1796,6 +1810,19 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
+bool TargetLoweringObjectFileCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ if (TM->getTargetTriple().getArch() == Triple::x86_64) {
+ if (!JumpTableInFunctionSection) {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+ }
+ }
+ return TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
+ UsesLabelDifference, F);
+}
+
void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
emitLinkerDirectives(Streamer, M);
@@ -2152,7 +2179,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
}
bool UniqueSectionNames = TM.getUniqueSectionNames();
- SmallString<128> Name = getSectionPrefixForGlobal(Kind);
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind, /*IsLarge=*/false);
if (const auto *F = dyn_cast<Function>(GO)) {
const auto &OptionalPrefix = F->getSectionPrefix();
@@ -2335,8 +2362,11 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
XCOFF::StorageMappingClass MappingClass;
if (Kind.isText())
MappingClass = XCOFF::XMC_PR;
- else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS())
+ else if (Kind.isData() || Kind.isBSS())
MappingClass = XCOFF::XMC_RW;
+ else if (Kind.isReadOnlyWithRel())
+ MappingClass =
+ TM.Options.XCOFFReadOnlyPointers ? XCOFF::XMC_RO : XCOFF::XMC_RW;
else if (Kind.isReadOnly())
MappingClass = XCOFF::XMC_RO;
else
@@ -2421,9 +2451,18 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
return TextSection;
}
- // TODO: We may put Kind.isReadOnlyWithRel() under option control, because
- // user may want to have read-only data with relocations placed into a
- // read-only section by the compiler.
+ if (TM.Options.XCOFFReadOnlyPointers && Kind.isReadOnlyWithRel()) {
+ if (!TM.getDataSections())
+ report_fatal_error(
+ "ReadOnlyPointers is supported only if data sections is turned on");
+
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
+ }
+
// For BSS kind, zero initialized data must be emitted to the .data section
// because external linkage control sections that get mapped to the .bss
// section will be linked as tentative defintions, which is only appropriate
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 3127328c363e..98ea2f21b3c8 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -42,6 +42,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
@@ -99,6 +101,9 @@ static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
cl::desc("Disable Copy Propagation pass"));
static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
+static cl::opt<bool> DisableAtExitBasedGlobalDtorLowering(
+ "disable-atexit-based-global-dtor-lowering", cl::Hidden,
+ cl::desc("For MachO, disable atexit()-based global destructor lowering"));
static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
@@ -168,12 +173,6 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
"Disable the abort but emit a diagnostic on failure")));
-// An option that disables inserting FS-AFDO discriminators before emit.
-// This is mainly for debugging and tuning purpose.
-static cl::opt<bool>
- FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
- cl::desc("Do not insert FS-AFDO discriminators before "
- "emit."));
// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
// tuning purpose.
static cl::opt<bool> DisableRAFSProfileLoader(
@@ -878,7 +877,7 @@ void TargetPassConfig::addIRPasses() {
// For MachO, lower @llvm.global_dtors into @llvm.global_ctors with
// __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
if (TM->getTargetTriple().isOSBinFormatMachO() &&
- TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ !DisableAtExitBasedGlobalDtorLowering)
addPass(createLowerGlobalDtorsLegacyPass());
// Make sure that no unreachable blocks are instruction selected.
@@ -977,6 +976,8 @@ void TargetPassConfig::addISelPrepare() {
if (requiresCodeGenSCCOrder())
addPass(new DummyCGSCCPass);
+ addPass(createCallBrPass());
+
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
addPass(createSafeStackPass());
@@ -1082,8 +1083,8 @@ bool TargetPassConfig::addISelPasses() {
if (TM->useEmulatedTLS())
addPass(createLowerEmuTLSPass());
- addPass(createPreISelIntrinsicLoweringPass());
PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ addPass(createPreISelIntrinsicLoweringPass());
addPass(createExpandLargeDivRemPass());
addPass(createExpandLargeFpConvertPass());
addIRPasses();
@@ -1149,9 +1150,9 @@ void TargetPassConfig::addMachinePasses() {
sampleprof::FSDiscriminatorPass::Pass1));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
- addPass(
- createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
- sampleprof::FSDiscriminatorPass::Pass1));
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1,
+ nullptr));
}
// Run register allocation and passes that are tightly coupled with it,
@@ -1219,14 +1220,6 @@ void TargetPassConfig::addMachinePasses() {
addPass(&XRayInstrumentationID);
addPass(&PatchableFunctionID);
- if (EnableFSDiscriminator && !FSNoFinalDiscrim)
- // Add FS discriminators here so that all the instruction duplicates
- // in different BBs get their own discriminators. With this, we can "sum"
- // the SampleFDO counters instead of using MAX. This will improve the
- // SampleFDO profile quality.
- addPass(createMIRAddFSDiscriminatorsPass(
- sampleprof::FSDiscriminatorPass::PassLast));
-
addPreEmitPass();
if (TM->Options.EnableIPRA)
@@ -1252,6 +1245,10 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
+ if (EnableFSDiscriminator)
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::PassLast));
+
// Machine function splitter uses the basic block sections feature. Both
// cannot be enabled at the same time. Basic block sections takes precedence.
// FIXME: In principle, BasicBlockSection::Labels and splitting can used
@@ -1264,9 +1261,25 @@ void TargetPassConfig::addMachinePasses() {
addPass(llvm::createBasicBlockSectionsPass());
} else if (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter) {
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty()) {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRProfileLoaderPass(
+ ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::PassLast, nullptr));
+ } else {
+ // Sample profile is given, but FSDiscriminator is not
+ // enabled, this may result in performance regression.
+ WithColor::warning()
+ << "Using AutoFDO without FSDiscriminator for MFS may regress "
+ "performance.";
+ }
+ }
addPass(createMachineFunctionSplitterPass());
}
+ addPostBBSections();
+
if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
addPass(createCFIFixup());
@@ -1525,9 +1538,9 @@ void TargetPassConfig::addBlockPlacement() {
sampleprof::FSDiscriminatorPass::Pass2));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
- addPass(
- createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
- sampleprof::FSDiscriminatorPass::Pass2));
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2,
+ nullptr));
}
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index a41d5999d961..77d2dfcf2323 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
@@ -79,8 +79,8 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet,
MCRegister Reg) const {
- for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
- RegisterSet.set(*AI);
+ for (MCPhysReg SR : superregs_inclusive(Reg))
+ RegisterSet.set(SR);
}
bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
@@ -90,9 +90,9 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
for (unsigned Reg : RegisterSet.set_bits()) {
if (Checked[Reg])
continue;
- for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
- if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
- dbgs() << "Error: Super register " << printReg(*SR, this)
+ for (MCPhysReg SR : superregs(Reg)) {
+ if (!RegisterSet[SR] && !is_contained(Exceptions, Reg)) {
+ dbgs() << "Error: Super register " << printReg(SR, this)
<< " of reserved register " << printReg(Reg, this)
<< " is not reserved.\n";
return false;
@@ -100,7 +100,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
// We transitively check superregs. So we can remember this for later
// to avoid compiletime explosion in deep register hierarchies.
- Checked.set(*SR);
+ Checked.set(SR);
}
}
return true;
@@ -281,7 +281,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const TargetRegisterInfo *TRI) {
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
if (unsigned Common = *A++ & *B++)
- return TRI->getRegClass(I + countTrailingZeros(Common));
+ return TRI->getRegClass(I + llvm::countr_zero(Common));
return nullptr;
}
@@ -424,8 +424,8 @@ bool TargetRegisterInfo::getRegAllocationHints(
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- const std::pair<Register, SmallVector<Register, 4>> &Hints_MRI =
- MRI.getRegAllocationHints(VirtReg);
+ const std::pair<unsigned, SmallVector<Register, 4>> &Hints_MRI =
+ MRI.getRegAllocationHints(VirtReg);
SmallSet<Register, 32> HintedRegs;
// First hint may be a target hint.
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8cb3667aea28..c3ea76bf8cea 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -87,18 +87,18 @@ static cl::opt<unsigned> MaxDataFlowEdge(
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const InstrItineraryData *InstrItins;
- MachineRegisterInfo *MRI;
- LiveVariables *LV;
- LiveIntervals *LIS;
- AliasAnalysis *AA;
- CodeGenOpt::Level OptLevel;
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const InstrItineraryData *InstrItins = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
+ AliasAnalysis *AA = nullptr;
+ CodeGenOpt::Level OptLevel = CodeGenOpt::None;
// The current basic block being processed.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
// Keep track the distance of a MI from the start of the current basic block.
DenseMap<MachineInstr*, unsigned> DistanceMap;
@@ -198,8 +198,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
-static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS);
-
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
@@ -287,7 +285,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
+static bool isPlainlyKilled(const MachineInstr *MI, Register Reg,
LiveIntervals *LIS) {
if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
@@ -311,6 +309,12 @@ static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
return MI->killsRegister(Reg);
}
+/// Test if the register used by the given operand is killed by the operand's
+/// instruction.
+static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) {
+ return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg(), LIS);
+}
+
/// Test if the given register value, which is used by the given
/// instruction, is killed by the given instruction. This looks through
/// coalescable copies to see if the original value is potentially not killed.
@@ -404,7 +408,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned Src2 = UseMI.getOperandNo(UseOp);
+ unsigned Src2 = UseOp->getOperandNo();
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
@@ -693,10 +697,8 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
assert(NewMI->getNumExplicitDefs() == 1);
// Find the old and new def location.
- auto OldIt = mi->defs().begin();
- auto NewIt = NewMI->defs().begin();
- unsigned OldIdx = mi->getOperandNo(OldIt);
- unsigned NewIdx = NewMI->getOperandNo(NewIt);
+ unsigned OldIdx = mi->defs().begin()->getOperandNo();
+ unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
// Record that one def has been replaced by the other.
unsigned NewInstrNum = NewMI->getDebugInstrNum();
@@ -863,8 +865,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
Defs.push_back(MOReg);
else {
Uses.push_back(MOReg);
- if (MOReg != Reg && (MO.isKill() ||
- (LIS && isPlainlyKilled(MI, MOReg, LIS))))
+ if (MOReg != Reg && isPlainlyKilled(MO, LIS))
Kills.push_back(MOReg);
}
}
@@ -915,8 +916,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
} else {
if (regOverlapsSet(Defs, MOReg, TRI))
return false;
- bool isKill =
- MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
+ bool isKill = isPlainlyKilled(MO, LIS);
if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
regOverlapsSet(Kills, MOReg, TRI)))
// Don't want to extend other live ranges and update kills.
@@ -1044,7 +1044,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
continue;
if (isDefTooClose(MOReg, DI->second, MI))
return false;
- bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ bool isKill = isPlainlyKilled(MO, LIS);
if (MOReg == Reg && !isKill)
return false;
Uses.push_back(MOReg);
@@ -1086,8 +1086,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
if (regOverlapsSet(Kills, MOReg, TRI))
// Don't want to extend other live ranges and update kills.
return false;
- if (&OtherMI != MI && MOReg == Reg &&
- !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))))
+ if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO, LIS))
// We can't schedule across a use of the register in question.
return false;
} else {
@@ -1533,8 +1532,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
}
} else {
- for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) {
- if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) {
+ for (MCRegUnit Unit : TRI->regunits(RegA)) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) {
VNInfo *VNI =
LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
@@ -1566,8 +1565,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (AllUsesCopied) {
LaneBitmask RemainingUses = LaneBitmask::getNone();
// Replace other (un-tied) uses of regB with LastCopiedReg.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
if (MO.isKill()) {
MO.setIsKill(false);
@@ -1619,8 +1618,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// regB is still used in this instruction, but a kill flag was
// removed from a different tied use of regB, so now we need to add
// a kill flag to one of the remaining uses of regB.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
MO.setIsKill(true);
break;
}
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index e6c0b3242d67..426292345a14 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -235,8 +235,6 @@ bool TypePromotionImpl::isSource(Value *V) {
return true;
else if (isa<LoadInst>(V))
return true;
- else if (isa<BitCastInst>(V))
- return true;
else if (auto *Call = dyn_cast<CallInst>(V))
return Call->hasRetAttr(Attribute::AttrKind::ZExt);
else if (auto *Trunc = dyn_cast<TruncInst>(V))
@@ -724,8 +722,9 @@ bool TypePromotionImpl::isSupportedValue(Value *V) {
case Instruction::Ret:
case Instruction::Load:
case Instruction::Trunc:
- case Instruction::BitCast:
return isSupportedType(I);
+ case Instruction::BitCast:
+ return I->getOperand(0)->getType() == I->getType();
case Instruction::ZExt:
return isSupportedType(I->getOperand(0));
case Instruction::ICmp:
@@ -960,8 +959,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
- LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0)
- << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: "
+ << *I.getOperand(0) << "\n");
EVT ZExtVT = TLI->getValueType(DL, I.getType());
Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
auto PromoteWidth = ZExtVT.getFixedSizeInBits();
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 5e8514f525e9..f17450d264ba 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -120,16 +120,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
while (BB.succ_begin() != BB.succ_end()) {
MachineBasicBlock* succ = *BB.succ_begin();
- MachineBasicBlock::iterator start = succ->begin();
- while (start != succ->end() && start->isPHI()) {
- for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
- if (start->getOperand(i).isMBB() &&
- start->getOperand(i).getMBB() == &BB) {
- start->removeOperand(i);
- start->removeOperand(i-1);
+ for (MachineInstr &Phi : succ->phis()) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (Phi.getOperand(i).isMBB() &&
+ Phi.getOperand(i).getMBB() == &BB) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
}
-
- start++;
+ }
}
BB.removeSuccessor(BB.succ_begin());
@@ -152,18 +150,18 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// Prune unneeded PHI entries.
SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(),
BB.pred_end());
- MachineBasicBlock::iterator phi = BB.begin();
- while (phi != BB.end() && phi->isPHI()) {
- for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
- if (!preds.count(phi->getOperand(i).getMBB())) {
- phi->removeOperand(i);
- phi->removeOperand(i-1);
+ for (MachineInstr &Phi : make_early_inc_range(BB.phis())) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (!preds.count(Phi.getOperand(i).getMBB())) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
ModifiedPHI = true;
}
+ }
- if (phi->getNumOperands() == 3) {
- const MachineOperand &Input = phi->getOperand(1);
- const MachineOperand &Output = phi->getOperand(0);
+ if (Phi.getNumOperands() == 3) {
+ const MachineOperand &Input = Phi.getOperand(1);
+ const MachineOperand &Output = Phi.getOperand(0);
Register InputReg = Input.getReg();
Register OutputReg = Output.getReg();
assert(Output.getSubReg() == 0 && "Cannot have output subregister");
@@ -182,16 +180,13 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
- BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(),
+ BuildMI(BB, BB.getFirstNonPHI(), Phi.getDebugLoc(),
TII->get(TargetOpcode::COPY), OutputReg)
.addReg(InputReg, getRegState(Input), InputSub);
}
- phi++->eraseFromParent();
+ Phi.eraseFromParent();
}
- continue;
}
-
- ++phi;
}
}
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 88460971338c..fc1cbfefb0db 100644
--- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -209,7 +209,7 @@ void VLIWMachineScheduler::schedule() {
Topo.InitDAGTopologicalSorting();
// Postprocess the DAG to add platform-specific artificial dependencies.
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit *, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 608434800bc3..d514e1642e29 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/WithColor.h"
@@ -173,9 +174,20 @@ std::string EVT::getEVTString() const {
case MVT::Untyped: return "Untyped";
case MVT::funcref: return "funcref";
case MVT::externref: return "externref";
+ case MVT::aarch64svcount:
+ return "aarch64svcount";
+ case MVT::spirvbuiltin:
+ return "spirvbuiltin";
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void EVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
/// getTypeForEVT - This method returns an LLVM type corresponding to the
/// specified EVT. For integer types, this returns an unsigned type. Note
/// that this will abort for types that cannot be represented.
@@ -202,14 +214,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::aarch64svcount:
+ return TargetExtType::get(Context, "aarch64.svcount");
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
- case MVT::externref:
- // pointer to opaque struct in addrspace(10)
- return PointerType::get(StructType::create(Context), 10);
- case MVT::funcref:
- // pointer to i8 addrspace(20)
- return PointerType::get(Type::getInt8Ty(Context), 20);
+ case MVT::externref: return Type::getWasm_ExternrefTy(Context);
+ case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
@@ -561,6 +571,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
/// as Other, otherwise they are invalid.
MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+ assert(Ty != nullptr && "Invalid type");
switch (Ty->getTypeID()) {
default:
if (HandleUnknown) return MVT(MVT::Other);
@@ -575,6 +586,16 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::TargetExtTyID: {
+ TargetExtType *TargetExtTy = cast<TargetExtType>(Ty);
+ if (TargetExtTy->getName() == "aarch64.svcount")
+ return MVT(MVT::aarch64svcount);
+ else if (TargetExtTy->getName().starts_with("spirv."))
+ return MVT(MVT::spirvbuiltin);
+ if (HandleUnknown)
+ return MVT(MVT::Other);
+ llvm_unreachable("Unknown target ext type!");
+ }
case Type::X86_AMXTyID: return MVT(MVT::x86amx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
@@ -607,3 +628,15 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
}
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+void MVT::print(raw_ostream &OS) const {
+ OS << EVT(*this).getEVTString();
+}
+
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index f80b06d7e9b7..a816bd5b52de 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -116,10 +116,10 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) const {
}
bool VirtRegMap::hasKnownPreference(Register VirtReg) const {
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
- if (Register::isPhysicalRegister(Hint.second))
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (Hint.second.isPhysical())
return true;
- if (Register::isVirtualRegister(Hint.second))
+ if (Hint.second.isVirtual())
return hasPhys(Hint.second);
return false;
}
@@ -181,14 +181,14 @@ LLVM_DUMP_METHOD void VirtRegMap::dump() const {
namespace {
class VirtRegRewriter : public MachineFunctionPass {
- MachineFunction *MF;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- SlotIndexes *Indexes;
- LiveIntervals *LIS;
- VirtRegMap *VRM;
- LiveDebugVariables *DebugVars;
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ SlotIndexes *Indexes = nullptr;
+ LiveIntervals *LIS = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
DenseSet<Register> RewriteRegs;
bool ClearVirtRegs;
@@ -514,8 +514,8 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
- for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) {
- const LiveRange &UnitRange = LIS->getRegUnit(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(SuperPhysReg)) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
// If the regunit is live both before and after MI,
// we assume it is live through.
// Generally speaking, this is not true, because something like
@@ -633,9 +633,8 @@ void VirtRegRewriter::rewrite() {
// Don't bother maintaining accurate LiveIntervals for registers which were
// already allocated.
for (Register PhysReg : RewriteRegs) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid();
- ++Units) {
- LIS->removeRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LIS->removeRegUnit(Unit);
}
}
}
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 361f185243b1..cc04807e8455 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -80,6 +80,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/InitializePasses.h"
@@ -209,6 +210,12 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
if (CatchPads.empty() && CleanupPads.empty())
return false;
+ if (!F.hasPersonalityFn() ||
+ !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Function '" + F.getName() +
+ "' does not have a correct Wasm personality function "
+ "'__gxx_wasm_personality_v0'");
+ }
assert(F.hasPersonalityFn() && "Personality function not found");
// __wasm_lpad_context global variable.
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index dfca2be0a114..11597b119893 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -18,12 +18,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -216,6 +216,127 @@ static void calculateStateNumbersForInvokes(const Function *Fn,
}
}
+// See comments below for calculateSEHStateForAsynchEH().
+// State - incoming State of normal paths
+struct WorkItem {
+ const BasicBlock *Block;
+ int State;
+ WorkItem(const BasicBlock *BB, int St) {
+ Block = BB;
+ State = St;
+ }
+};
+void llvm::calculateCXXStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state, also flag visiting
+
+ if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) && State > 0) {
+ // Retrive the new State
+ State = EHInfo.CxxUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_begin ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin))
+ // Retrive the new State from seh_scope_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_end ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)) {
+ // In case of conditional ctor, let's retrieve State from Invoke
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.CxxUnwindMap[State].ToState;
+ }
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
+// The central theory of this routine is based on the following:
+// A _try scope is always a SEME (Single Entry Multiple Exits) region
+// as jumping into a _try is not allowed
+// The single entry must start with a seh_try_begin() invoke with a
+// correct State number that is the initial state of the SEME.
+// Through control-flow, state number is propagated into all blocks.
+// Side exits marked by seh_try_end() will unwind to parent state via
+// existing SEHUnwindMap[].
+// Side exits can ONLY jump into parent scopes (lower state number).
+// Thus, when a block succeeds various states from its predecessors,
+// the lowest State trumphs others.
+// If some exits flow to unreachable, propagation on those paths terminate,
+// not affecting remaining blocks.
+void llvm::calculateSEHStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state
+
+ if (isa<CatchPadInst>(I) && isa<CatchReturnInst>(TI)) {
+ const Constant *FilterOrNull = cast<Constant>(
+ cast<CatchPadInst>(I)->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ if (!Filter || !Filter->getName().startswith("__IsLocalUnwind"))
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) &&
+ State > 0) {
+ // Retrive the new State.
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin)
+ // Retrive the new State from seh_try_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.SEHUnwindMap[State].ToState;
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
// to. If the unwind edge came from an invoke, return null.
static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
@@ -276,6 +397,7 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
for (const auto *CatchPad : Handlers) {
FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ FuncInfo.EHPadStateMap[CatchPad] = CatchLow;
for (const User *U : CatchPad->users()) {
const auto *UserI = cast<Instruction>(U);
if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
@@ -384,6 +506,7 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
// Everything in the __try block uses TryState as its parent state.
FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ FuncInfo.EHPadStateMap[CatchPad] = TryState;
LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
<< CatchPadBB->getName() << '\n');
for (const BasicBlock *PredBlock : predecessors(BB))
@@ -464,6 +587,12 @@ void llvm::calculateSEHStateNumbers(const Function *Fn,
}
calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateSEHStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
}
void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
@@ -482,6 +611,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
}
calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateCXXStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
}
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
@@ -602,7 +737,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// so visit pads in descendant-most to ancestor-most order.
for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) {
const Instruction *Pad =
- Entry.Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ cast<const BasicBlock *>(Entry.Handler)->getFirstNonPHI();
// For most pads, the TryParentState is the state associated with the
// unwind dest of exceptional exits from it.
const BasicBlock *UnwindDest;
@@ -638,8 +773,8 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
int UserUnwindState =
FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
if (UserUnwindState != -1)
- UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
- .Handler.get<const BasicBlock *>();
+ UserUnwindDest = cast<const BasicBlock *>(
+ FuncInfo.ClrEHUnwindMap[UserUnwindState].Handler);
}
// Not having an unwind dest for this user might indicate that it
@@ -1253,4 +1388,9 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
+void WinEHFuncInfo::addIPToStateRange(int State, MCSymbol* InvokeBegin,
+ MCSymbol* InvokeEnd) {
+ LabelToStateMap[InvokeBegin] = std::make_pair(State, InvokeEnd);
+}
+
WinEHFuncInfo::WinEHFuncInfo() = default;
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 13f45ae048bb..d40725838c94 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,6 +28,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -226,6 +226,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
case Triple::ArchType::hexagon:
+ case Triple::ArchType::loongarch64:
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64: