aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp115
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h45
-rw-r--r--lib/CodeGen/AllocationOrder.cpp7
-rw-r--r--lib/CodeGen/AllocationOrder.h8
-rw-r--r--lib/CodeGen/Analysis.cpp27
-rw-r--r--lib/CodeGen/AntiDepBreaker.h26
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp19
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h19
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp300
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp22
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp9
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp539
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h32
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h20
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp66
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp52
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h130
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp21
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp132
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h46
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp409
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h114
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp25
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h39
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h23
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp36
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h21
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp61
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp77
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp15
-rw-r--r--lib/CodeGen/BranchCoalescing.cpp758
-rw-r--r--lib/CodeGen/BranchFolding.cpp205
-rw-r--r--lib/CodeGen/BranchFolding.h49
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp62
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp166
-rw-r--r--lib/CodeGen/CallingConvLower.cpp6
-rw-r--r--lib/CodeGen/CodeGen.cpp6
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp1961
-rw-r--r--lib/CodeGen/CountingFunctionInserter.cpp62
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp54
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h33
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp36
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp5
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp19
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp32
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp34
-rw-r--r--lib/CodeGen/EdgeBundles.cpp12
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp12
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp825
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp10
-rw-r--r--lib/CodeGen/ExpandReductions.cpp2
-rw-r--r--lib/CodeGen/FEntryInserter.cpp12
-rw-r--r--lib/CodeGen/GCRootLowering.cpp13
-rw-r--r--lib/CodeGen/GlobalISel/CMakeLists.txt42
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp11
-rw-r--r--lib/CodeGen/GlobalISel/GlobalISel.cpp8
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp87
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp27
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp34
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp249
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp230
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp399
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp5
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp92
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp19
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp55
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp25
-rw-r--r--lib/CodeGen/GlobalMerge.cpp57
-rw-r--r--lib/CodeGen/IfConversion.cpp213
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp66
-rw-r--r--lib/CodeGen/InlineSpiller.cpp146
-rw-r--r--lib/CodeGen/InterferenceCache.cpp20
-rw-r--r--lib/CodeGen/InterferenceCache.h75
-rw-r--r--lib/CodeGen/InterleavedAccessPass.cpp36
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp37
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp57
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp1
-rw-r--r--lib/CodeGen/LexicalScopes.cpp9
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp106
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp615
-rw-r--r--lib/CodeGen/LiveDebugVariables.h13
-rw-r--r--lib/CodeGen/LiveInterval.cpp52
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp4
-rw-r--r--lib/CodeGen/LiveIntervals.cpp (renamed from lib/CodeGen/LiveIntervalAnalysis.cpp)48
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp123
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp38
-rw-r--r--lib/CodeGen/LiveRangeCalc.h46
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp6
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp39
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp18
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp30
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp6
-rw-r--r--lib/CodeGen/LiveVariables.cpp13
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp42
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp2
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp625
-rw-r--r--lib/CodeGen/MIRParser/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp32
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h24
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp197
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp27
-rw-r--r--lib/CodeGen/MIRPrinter.cpp491
-rw-r--r--lib/CodeGen/MIRPrintingPass.cpp1
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp47
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp65
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp132
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp2
-rw-r--r--lib/CodeGen/MachineCSE.cpp67
-rw-r--r--lib/CodeGen/MachineCombiner.cpp134
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp61
-rw-r--r--lib/CodeGen/MachineDominators.cpp5
-rw-r--r--lib/CodeGen/MachineFrameInfo.cpp47
-rw-r--r--lib/CodeGen/MachineFunction.cpp145
-rw-r--r--lib/CodeGen/MachineInstr.cpp930
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp6
-rw-r--r--lib/CodeGen/MachineLICM.cpp90
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp7
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp16
-rw-r--r--lib/CodeGen/MachineOperand.cpp936
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp22
-rw-r--r--lib/CodeGen/MachineOutliner.cpp851
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp190
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp27
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp40
-rw-r--r--lib/CodeGen/MachineScheduler.cpp248
-rw-r--r--lib/CodeGen/MachineSink.cpp89
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp170
-rw-r--r--lib/CodeGen/MachineVerifier.cpp436
-rw-r--r--lib/CodeGen/MacroFusion.cpp81
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp29
-rw-r--r--lib/CodeGen/PHIElimination.cpp58
-rw-r--r--lib/CodeGen/ParallelCG.cpp1
-rw-r--r--lib/CodeGen/PatchableFunction.cpp10
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp95
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp4
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp15
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp24
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp284
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp45
-rw-r--r--lib/CodeGen/README.txt2
-rw-r--r--lib/CodeGen/RegAllocBase.cpp19
-rw-r--r--lib/CodeGen/RegAllocBase.h32
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp13
-rw-r--r--lib/CodeGen/RegAllocFast.cpp746
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp467
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp16
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp18
-rw-r--r--lib/CodeGen/RegUsageInfoPropagate.cpp35
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp8
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp291
-rw-r--r--lib/CodeGen/RegisterCoalescer.h37
-rw-r--r--lib/CodeGen/RegisterPressure.cpp10
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp45
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp8
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp34
-rw-r--r--lib/CodeGen/ResetMachineFunctionPass.cpp2
-rw-r--r--lib/CodeGen/SafeStack.cpp66
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp17
-rw-r--r--lib/CodeGen/SafeStackColoring.h35
-rw-r--r--lib/CodeGen/SafeStackLayout.cpp12
-rw-r--r--lib/CodeGen/SafeStackLayout.h18
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp49
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp8
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp42
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2497
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp25
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp543
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp99
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h27
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp252
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h85
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp215
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp650
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp664
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h219
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp76
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp157
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h16
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp330
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp43
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp90
-rw-r--r--lib/CodeGen/SlotIndexes.cpp3
-rw-r--r--lib/CodeGen/SpillPlacement.cpp36
-rw-r--r--lib/CodeGen/SpillPlacement.h20
-rw-r--r--lib/CodeGen/Spiller.h20
-rw-r--r--lib/CodeGen/SplitKit.cpp108
-rw-r--r--lib/CodeGen/SplitKit.h47
-rw-r--r--lib/CodeGen/StackColoring.cpp84
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--lib/CodeGen/StackMaps.cpp16
-rw-r--r--lib/CodeGen/StackProtector.cpp58
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp72
-rw-r--r--lib/CodeGen/TailDuplication.cpp9
-rw-r--r--lib/CodeGen/TailDuplicator.cpp52
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp12
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp187
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp520
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp106
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp6
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp145
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp127
-rw-r--r--lib/CodeGen/TargetSchedule.cpp52
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp17
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp116
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp7
-rw-r--r--lib/CodeGen/VirtRegMap.cpp62
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp2
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp69
239 files changed, 16327 insertions, 10945 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5abf50e5bd10..ffcb9a09ad73 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,16 +15,33 @@
//===----------------------------------------------------------------------===//
#include "AggressiveAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
@@ -34,18 +51,17 @@ static cl::opt<int>
DebugDiv("agg-antidep-debugdiv",
cl::desc("Debug control for aggressive anti-dep breaker"),
cl::init(0), cl::Hidden);
+
static cl::opt<int>
DebugMod("agg-antidep-debugmod",
cl::desc("Debug control for aggressive anti-dep breaker"),
cl::init(0), cl::Hidden);
AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
- MachineBasicBlock *BB) :
- NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
- GroupNodeIndices(TargetRegs, 0),
- KillIndices(TargetRegs, 0),
- DefIndices(TargetRegs, 0)
-{
+ MachineBasicBlock *BB)
+ : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0) {
const unsigned BBSize = BB->size();
for (unsigned i = 0; i < NumTargetRegs; ++i) {
// Initialize all registers to be in their own group. Initially we
@@ -76,8 +92,7 @@ void AggressiveAntiDepState::GetGroupRegs(
}
}
-unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
-{
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) {
assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
@@ -92,8 +107,7 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
return Parent;
}
-unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
-{
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) {
// Create a new GroupNode for Reg. Reg's existing GroupNode must
// stay as is because there could be other GroupNodes referring to
// it.
@@ -103,8 +117,7 @@ unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
return idx;
}
-bool AggressiveAntiDepState::IsLive(unsigned Reg)
-{
+bool AggressiveAntiDepState::IsLive(unsigned Reg) {
// KillIndex must be defined and DefIndex not defined for a register
// to be live.
return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
@@ -115,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
: AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
TII(MF.getSubtarget().getInstrInfo()),
- TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
- State(nullptr) {
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -129,7 +141,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
DEBUG(for (unsigned r : CriticalPathSet.set_bits())
- dbgs() << " " << TRI->getName(r));
+ dbgs() << " " << printReg(r, TRI));
DEBUG(dbgs() << '\n');
}
@@ -204,7 +216,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
// schedule region).
if (State->IsLive(Reg)) {
DEBUG(if (State->GetGroup(Reg) != 0)
- dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ dbgs() << " " << printReg(Reg, TRI) << "=g" <<
State->GetGroup(Reg) << "->g0(region live-out)");
State->UnionGroups(Reg, 0);
} else if ((DefIndices[Reg] < InsertPosIndex)
@@ -250,7 +262,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
/// AntiDepEdges - Return in Edges the anti- and output- dependencies
/// in SU that we want to consider for breaking.
-static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) {
SmallSet<unsigned, 4> RegSet;
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
@@ -311,7 +323,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
RegRefs.erase(Reg);
State->LeaveGroup(Reg);
DEBUG(if (header) {
- dbgs() << header << TRI->getName(Reg); header = nullptr; });
+ dbgs() << header << printReg(Reg, TRI); header = nullptr; });
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
// Repeat for subregisters. Note that we only do this if the superregister
// was not live because otherwise, regardless whether we have an explicit
@@ -325,8 +337,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
RegRefs.erase(SubregReg);
State->LeaveGroup(SubregReg);
DEBUG(if (header) {
- dbgs() << header << TRI->getName(Reg); header = nullptr; });
- DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ dbgs() << header << printReg(Reg, TRI); header = nullptr; });
+ DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" <<
State->GetGroup(SubregReg) << tag);
}
}
@@ -362,7 +374,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+ DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
@@ -381,8 +393,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
unsigned AliasReg = *AI;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
- DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
- TRI->getName(AliasReg) << ")");
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via "
+ << printReg(AliasReg, TRI) << ")");
}
}
@@ -436,11 +448,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
// FIXME: The issue with predicated instruction is more complex. We are being
// conservatively here because the kill markers cannot be trusted after
// if-conversion:
- // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
- // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
- // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
- // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
//
// The first R6 kill is not really a kill since it's killed by a predicated
// instruction which may not be executed. The second R6 def may or may not
@@ -457,8 +469,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
- State->GetGroup(Reg));
+ DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
// It wasn't previously live but now it is, this is a kill. Forget
// the previous live-range information and start a new live-range
@@ -493,10 +504,10 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
if (Reg == 0) continue;
if (FirstReg != 0) {
- DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ DEBUG(dbgs() << "=" << printReg(Reg, TRI));
State->UnionGroups(FirstReg, Reg);
} else {
- DEBUG(dbgs() << " " << TRI->getName(Reg));
+ DEBUG(dbgs() << " " << printReg(Reg, TRI));
FirstReg = Reg;
}
}
@@ -544,8 +555,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// break the anti-dependence.
std::vector<unsigned> Regs;
State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
- assert(Regs.size() > 0 && "Empty register group!");
- if (Regs.size() == 0)
+ assert(!Regs.empty() && "Empty register group!");
+ if (Regs.empty())
return false;
// Find the "superest" register in the group. At the same time,
@@ -562,7 +573,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// If Reg has any references, then collect possible rename regs
if (RegRefs.count(Reg) > 0) {
- DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+ DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
BitVector &BV = RenameRegisterMap[Reg];
assert(BV.empty());
@@ -571,7 +582,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG({
dbgs() << " ::";
for (unsigned r : BV.set_bits())
- dbgs() << " " << TRI->getName(r);
+ dbgs() << " " << printReg(r, TRI);
dbgs() << "\n";
});
}
@@ -596,8 +607,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
if (renamecnt++ % DebugDiv != DebugMod)
return false;
- dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
- " for debug ***\n";
+ dbgs() << "*** Performing rename " << printReg(SuperReg, TRI)
+ << " for debug ***\n";
}
#endif
@@ -634,7 +645,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
- DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');
RenameMap.clear();
// For each referenced group register (which must be a SuperReg or
@@ -651,7 +662,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
}
- DEBUG(dbgs() << " " << TRI->getName(NewReg));
+ DEBUG(dbgs() << " " << printReg(NewReg, TRI));
// Check if Reg can be renamed to NewReg.
if (!RenameRegisterMap[Reg].test(NewReg)) {
@@ -672,7 +683,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned AliasReg = *AI;
if (State->IsLive(AliasReg) ||
(KillIndices[Reg] > DefIndices[AliasReg])) {
- DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)");
found = true;
break;
}
@@ -732,14 +743,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
/// BreakAntiDependencies - Identifiy anti-dependencies within the
/// ScheduleDAG and break them by renaming registers.
-///
unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
- const std::vector<SUnit>& SUnits,
+ const std::vector<SUnit> &SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) {
-
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -783,7 +792,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
DEBUG(dbgs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
- DEBUG(dbgs() << " " << TRI->getName(Reg));
+ DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
DEBUG(dbgs() << '\n');
#endif
@@ -839,7 +848,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
(Edge->getKind() != SDep::Output)) continue;
unsigned AntiDepReg = Edge->getReg();
- DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
if (!MRI.isAllocatable(AntiDepReg)) {
@@ -942,7 +951,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
std::map<unsigned, unsigned> RenameMap;
if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
- << TRI->getName(AntiDepReg) << ":");
+ << printReg(AntiDepReg, TRI) << ":");
// Handle each group register...
for (std::map<unsigned, unsigned>::iterator
@@ -950,9 +959,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
unsigned CurrReg = S->first;
unsigned NewReg = S->second;
- DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
- TRI->getName(NewReg) << "(" <<
- RegRefs.count(CurrReg) << " refs)");
+ DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
+ << printReg(NewReg, TRI) << "("
+ << RegRefs.count(CurrReg) << " refs)");
// Update the references to the old register CurrReg to
// refer to the new register NewReg.
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index f97e6666b219..5dce3c2499e5 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -19,29 +19,35 @@
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Compiler.h"
#include <map>
+#include <set>
+#include <vector>
namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+class TargetRegisterInfo;
/// Contains all the state necessary for anti-dep breaking.
class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
public:
/// Information about a register reference within a liverange
- typedef struct {
+ struct RegisterReference {
/// The registers operand
MachineOperand *Operand;
+
/// The register class
const TargetRegisterClass *RC;
- } RegisterReference;
+ };
private:
/// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
@@ -110,7 +116,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker
: public AntiDepBreaker {
- MachineFunction& MF;
+ MachineFunction &MF;
MachineRegisterInfo &MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -121,10 +127,10 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
BitVector CriticalPathSet;
/// The state used to identify and rename anti-dependence registers.
- AggressiveAntiDepState *State;
+ AggressiveAntiDepState *State = nullptr;
public:
- AggressiveAntiDepBreaker(MachineFunction& MFi,
+ AggressiveAntiDepBreaker(MachineFunction &MFi,
const RegisterClassInfo &RCI,
TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker() override;
@@ -134,8 +140,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
/// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
- ///
- unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
@@ -143,7 +148,6 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- ///
void Observe(MachineInstr &MI, unsigned Count,
unsigned InsertPosIndex) override;
@@ -152,7 +156,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
private:
/// Keep track of a position in the allocation order for each regclass.
- typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+ using RenameOrderType = std::map<const TargetRegisterClass *, unsigned>;
/// Return true if MO represents a register
/// that is both implicitly used and defined in MI
@@ -174,6 +178,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
RenameOrderType& RenameOrder,
std::map<unsigned, unsigned> &RenameMap);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index d840a2f69ab3..8e8c1d8e08d1 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -31,18 +31,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
const RegisterClassInfo &RegClassInfo,
const LiveRegMatrix *Matrix)
- : Pos(0) {
+ : Pos(0), HardHints(false) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
- TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
+ if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix))
+ HardHints = true;
rewind();
DEBUG({
if (!Hints.empty()) {
dbgs() << "hints:";
for (unsigned I = 0, E = Hints.size(); I != E; ++I)
- dbgs() << ' ' << PrintReg(Hints[I], TRI);
+ dbgs() << ' ' << printReg(Hints[I], TRI);
dbgs() << '\n';
}
});
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 8223a52e333b..467bcc2edc6f 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -32,7 +32,11 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder {
ArrayRef<MCPhysReg> Order;
int Pos;
+ // If HardHints is true, *only* Hints will be returned.
+ bool HardHints;
+
public:
+
/// Create a new AllocationOrder for VirtReg.
/// @param VirtReg Virtual register to allocate for.
/// @param VRM Virtual register map for function.
@@ -51,6 +55,8 @@ public:
unsigned next(unsigned Limit = 0) {
if (Pos < 0)
return Hints.end()[Pos++];
+ if (HardHints)
+ return 0;
if (!Limit)
Limit = Order.size();
while (Pos < int(Limit)) {
@@ -68,6 +74,8 @@ public:
unsigned nextWithDups(unsigned Limit) {
if (Pos < 0)
return Hints.end()[Pos++];
+ if (HardHints)
+ return 0;
if (Pos < int(Limit))
return Order[Pos++];
return 0;
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index c2aecc651b79..0731ae575437 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -14,7 +14,9 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -24,9 +26,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
@@ -565,6 +564,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return false;
const Value *RetVal = Ret->getOperand(0), *CallVal = I;
+ // Intrinsic like llvm.memcpy has no return value, but the expanded
+ // libcall may or may not have return value. On most platforms, it
+ // will be expanded as memcpy in libc, which returns the first
+ // argument. On other platforms like arm-none-eabi, memcpy may be
+ // expanded as library call without return value, like __aeabi_memcpy.
+ const CallInst *Call = cast<CallInst>(I);
+ if (Function *F = Call->getCalledFunction()) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ if (((IID == Intrinsic::memcpy &&
+ TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
+ (IID == Intrinsic::memmove &&
+ TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
+ (IID == Intrinsic::memset &&
+ TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
+ RetVal == Call->getArgOperand(0))
+ return true;
+ }
+
SmallVector<unsigned, 4> RetPath, CallPath;
SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
@@ -651,7 +668,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
int EntryBBNumber = MF.front().getNumber();
bool IsSEH = isAsynchronousEHPersonality(
- classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()));
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index d14d93100adb..181da83dc88b 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,12 +15,14 @@
#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <utility>
#include <vector>
namespace llvm {
@@ -29,17 +31,17 @@ namespace llvm {
/// registers to break register anti-dependencies (WAR hazards).
class LLVM_LIBRARY_VISIBILITY AntiDepBreaker {
public:
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
- DbgValueVector;
+ using DbgValueVector =
+ std::vector<std::pair<MachineInstr *, MachineInstr *>>;
virtual ~AntiDepBreaker();
/// Initialize anti-dep breaking for a new basic block.
- virtual void StartBlock(MachineBasicBlock *BB) =0;
+ virtual void StartBlock(MachineBasicBlock *BB) = 0;
/// Identifiy anti-dependencies within a basic-block region and break them by
/// renaming registers. Return the number of anti-dependencies broken.
- virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
@@ -51,7 +53,7 @@ public:
unsigned InsertPosIndex) = 0;
/// Finish anti-dep breaking for a basic block.
- virtual void FinishBlock() =0;
+ virtual void FinishBlock() = 0;
/// Update DBG_VALUE if dependency breaker is updating
/// other machine instruction to use NewReg.
@@ -81,6 +83,6 @@ public:
}
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 8b1376ab363d..15cfbd5c40ff 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,26 +12,19 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
@@ -67,16 +60,16 @@ void ARMException::beginFunction(const MachineFunction *MF) {
///
void ARMException::endFunction(const MachineFunction *MF) {
ARMTargetStreamer &ATS = getTargetStreamer();
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
const Function *Per = nullptr;
- if (F->hasPersonalityFn())
- Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn())
+ Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
bool forceEmitPersonality =
- F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
- F->needsUnwindTableEntry();
+ F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F.needsUnwindTableEntry();
bool shouldEmitPersonality = forceEmitPersonality ||
!MF->getLandingPads().empty();
- if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
+ if (!Asm->MF->getFunction().needsUnwindTableEntry() &&
!shouldEmitPersonality)
ATS.emitCantUnwind();
else if (shouldEmitPersonality) {
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index ec552e0640e9..59ed0324bdb0 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===//
+//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,9 +8,12 @@
//===----------------------------------------------------------------------===//
#include "AddressPool.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <utility>
using namespace llvm;
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index ba3e3b7c315d..990a158d87cd 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===//
+//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,13 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCSymbol.h"
namespace llvm {
-class MCSection;
+
class AsmPrinter;
+class MCSection;
+class MCSymbol;
+
// Collection of addresses for this unit and assorted labels.
// A Symbol->unsigned mapping of addresses used by indirect
// references.
@@ -23,6 +25,7 @@ class AddressPool {
struct AddressPoolEntry {
unsigned Number;
bool TLS;
+
AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
};
DenseMap<const MCSymbol *, AddressPoolEntry> Pool;
@@ -31,10 +34,10 @@ class AddressPool {
/// the last "resetUsedFlag" call. Used to implement type unit fallback - a
/// type that references addresses cannot be placed in a type unit when using
/// fission.
- bool HasBeenUsed;
+ bool HasBeenUsed = false;
public:
- AddressPool() : HasBeenUsed(false) {}
+ AddressPool() = default;
/// \brief Returns the index into the address pool with the given
/// label/symbol.
@@ -48,5 +51,7 @@ public:
void resetUsedFlag() { HasBeenUsed = false; }
};
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ff427c9a0d75..31037095aa2b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,10 +29,11 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -46,10 +47,19 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -62,14 +72,18 @@
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodePadder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
@@ -78,29 +92,28 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cstdint>
+#include <iterator>
#include <limits>
#include <memory>
#include <string>
@@ -129,7 +142,8 @@ static cl::opt<bool>
char AsmPrinter::ID = 0;
-typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
+using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
+
static gcp_map_type &getGCMap(void *&P) {
if (!P)
P = new gcp_map_type();
@@ -184,7 +198,6 @@ bool AsmPrinter::isPositionIndependent() const {
}
/// getFunctionNumber - Return a unique ID for the current function.
-///
unsigned AsmPrinter::getFunctionNumber() const {
return MF->getFunctionNumber();
}
@@ -221,8 +234,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
- if (isVerbose())
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineLoopInfo>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -242,28 +254,8 @@ bool AsmPrinter::doInitialization(Module &M) {
// alternative is duplicated code in each of the target asm printers that
// use the directive, where it would need the same conditionalization
// anyway.
- const Triple &TT = TM.getTargetTriple();
- // If there is a version specified, Major will be non-zero.
- if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
- unsigned Major, Minor, Update;
- MCVersionMinType VersionType;
- if (TT.isWatchOS()) {
- VersionType = MCVM_WatchOSVersionMin;
- TT.getWatchOSVersion(Major, Minor, Update);
- } else if (TT.isTvOS()) {
- VersionType = MCVM_TvOSVersionMin;
- TT.getiOSVersion(Major, Minor, Update);
- } else if (TT.isMacOSX()) {
- VersionType = MCVM_OSXVersionMin;
- if (!TT.getMacOSXVersion(Major, Minor, Update))
- Major = 0;
- } else {
- VersionType = MCVM_IOSVersionMin;
- TT.getiOSVersion(Major, Minor, Update);
- }
- if (Major != 0)
- OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
- }
+ const Triple &Target = TM.getTargetTriple();
+ OutStreamer->EmitVersionForTarget(Target);
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
@@ -272,7 +264,8 @@ bool AsmPrinter::doInitialization(Module &M) {
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->EmitFileDirective(M.getSourceFileName());
+ OutStreamer->EmitFileDirective(
+ llvm::sys::path::filename(M.getSourceFileName()));
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -628,35 +621,35 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
if (isVerbose())
OutStreamer->GetCommentOS()
<< "-- Begin function "
- << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
+ << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
// Print out constants referenced by the function
EmitConstantPool();
// Print the 'header' of function.
- OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
- EmitVisibility(CurrentFnSym, F->getVisibility());
+ OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM));
+ EmitVisibility(CurrentFnSym, F.getVisibility());
- EmitLinkage(F, CurrentFnSym);
+ EmitLinkage(&F, CurrentFnSym);
if (MAI->hasFunctionAlignment())
- EmitAlignment(MF->getAlignment(), F);
+ EmitAlignment(MF->getAlignment(), &F);
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
if (isVerbose()) {
- F->printAsOperand(OutStreamer->GetCommentOS(),
- /*PrintType=*/false, F->getParent());
+ F.printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, F.getParent());
OutStreamer->GetCommentOS() << '\n';
}
// Emit the prefix data.
- if (F->hasPrefixData()) {
+ if (F.hasPrefixData()) {
if (MAI->hasSubsectionsViaSymbols()) {
// Preserving prefix data on platforms which use subsections-via-symbols
// is a bit tricky. Here we introduce a symbol for the prefix data
@@ -665,12 +658,12 @@ void AsmPrinter::EmitFunctionHeader() {
MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
OutStreamer->EmitLabel(PrefixSym);
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
// Emit an .alt_entry directive for the actual function symbol.
OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
} else {
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
}
}
@@ -682,7 +675,7 @@ void AsmPrinter::EmitFunctionHeader() {
// references to the dangling symbols. Emit them at the start of the function
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
- MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
OutStreamer->AddComment("Address taken block that was later removed");
OutStreamer->EmitLabel(DeadBlockSyms[i]);
@@ -707,8 +700,8 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit the prologue data.
- if (F->hasPrologueData())
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
+ if (F.hasPrologueData())
+ EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -729,9 +722,11 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+/// It returns true iff the sched comment was emitted.
+/// Otherwise it returns false.
+static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
AsmPrinter *AP) {
- const MachineFunction *MF = MI.getParent()->getParent();
+ const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Check for spills and reloads
@@ -773,12 +768,16 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
CommentOS << " Reload Reuse";
}
- if (Commented && AP->EnablePrintSchedInfo)
- // If any comment was added above and we need sched info comment then
- // add this new comment just after the above comment w/o "\n" between them.
- CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
- else if (Commented)
+ if (Commented) {
+ if (AP->EnablePrintSchedInfo) {
+ // If any comment was added above and we need sched info comment then add
+ // this new comment just after the above comment w/o "\n" between them.
+ CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+ return true;
+ }
CommentOS << "\n";
+ }
+ return false;
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -789,7 +788,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
SmallString<128> Str;
raw_svector_ostream OS(Str);
OS << "implicit-def: "
- << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+ << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
OutStreamer->AddComment(OS.str());
OutStreamer->AddBlankLine();
@@ -802,10 +801,8 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
- OS << ' '
- << PrintReg(Op.getReg(),
- AP.MF->getSubtarget().getRegisterInfo())
- << (Op.isDef() ? "<def>" : "<kill>");
+ OS << ' ' << (Op.isDef() ? "def " : "killed ")
+ << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
}
AP.OutStreamer->AddComment(OS.str());
AP.OutStreamer->AddBlankLine();
@@ -890,7 +887,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (MemLoc)
OS << '[';
- OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
+ OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
if (MemLoc)
@@ -901,9 +898,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
return true;
}
-AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
- MF->getFunction()->needsUnwindTableEntry())
+ MF->getFunction().needsUnwindTableEntry())
return CFI_M_EH;
if (MMI->hasDebugInfo())
@@ -913,7 +910,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry();
+ return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
@@ -951,6 +948,31 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
+void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
+ if (!MF.getTarget().Options.EmitStackSizeSection)
+ return;
+
+ MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection();
+ if (!StackSizeSection)
+ return;
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ // Don't emit functions with dynamic stack allocations.
+ if (FrameInfo.hasVarSizedObjects())
+ return;
+
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(StackSizeSection);
+
+ const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction());
+ uint64_t StackSize = FrameInfo.getStackSize();
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext),
+ /* size = */ 8);
+ OutStreamer->EmitULEB128IntValue(StackSize);
+
+ OutStreamer->PopSection();
+}
+
static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
MachineModuleInfo *MMI) {
if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo())
@@ -958,10 +980,10 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
// We might emit an EH table that uses function begin and end labels even if
// we don't have any landingpads.
- if (!MF.getFunction()->hasPersonalityFn())
+ if (!MF.getFunction().hasPersonalityFn())
return false;
return !isNoOpWithoutInvoke(
- classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()));
}
/// EmitFunctionBody - This method emits the body and trailer for a
@@ -981,7 +1003,6 @@ void AsmPrinter::EmitFunctionBody() {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
for (auto &MI : MBB) {
-
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugValue()) {
@@ -998,18 +1019,18 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS(), this);
+ if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
+ MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
+ MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
+ }
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
emitCFIInstruction(MI);
break;
-
case TargetOpcode::LOCAL_ESCAPE:
emitFrameAlloc(MI);
break;
-
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
@@ -1049,7 +1070,7 @@ void AsmPrinter::EmitFunctionBody() {
EmittedInsts += NumInstsInFunction;
MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
- MF->getFunction()->getSubprogram(),
+ MF->getFunction().getSubprogram(),
&MF->front());
R << ore::NV("NumInstructions", NumInstsInFunction)
<< " instructions in function";
@@ -1077,8 +1098,8 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- const Function *F = MF->getFunction();
- for (const auto &BB : *F) {
+ const Function &F = MF->getFunction();
+ for (const auto &BB : F) {
if (!BB.hasAddressTaken())
continue;
MCSymbol *Sym = GetBlockAddressSymbol(&BB);
@@ -1125,6 +1146,9 @@ void AsmPrinter::EmitFunctionBody() {
HI.Handler->endFunction(MF);
}
+ // Emit section containing stack size metadata.
+ emitStackSizeSection(*MF);
+
if (isVerbose())
OutStreamer->GetCommentOS() << "-- End function\n";
@@ -1380,6 +1404,16 @@ bool AsmPrinter::doFinalization(Module &M) {
PtrSize);
}
+ // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
+ // split-stack is used.
+ if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) {
+ OutStreamer->SwitchSection(
+ OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0));
+ if (MMI->hasNosplitStack())
+ OutStreamer->SwitchSection(
+ OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
+ }
+
// If we don't have any trampolines, then we don't require stack memory
// to be executable. Some targets have a directive to declare this.
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
@@ -1408,7 +1442,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
- CurrentFnSym = getSymbol(MF.getFunction());
+ CurrentFnSym = getSymbol(&MF.getFunction());
CurrentFnSymForSize = CurrentFnSym;
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
@@ -1420,8 +1454,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- if (isVerbose())
- LI = &getAnalysis<MachineLoopInfo>();
+ LI = &getAnalysis<MachineLoopInfo>();
const TargetSubtargetInfo &STI = MF.getSubtarget();
EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
@@ -1446,7 +1479,6 @@ namespace {
/// representations of the constants in the constant pool MCP. This is
/// used to print out constants which have been "spilled to memory" by
/// the code generator.
-///
void AsmPrinter::EmitConstantPool() {
const MachineConstantPool *MCP = MF->getConstantPool();
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
@@ -1526,7 +1558,6 @@ void AsmPrinter::EmitConstantPool() {
/// EmitJumpTableInfo - Print assembly representations of the jump tables used
/// by the current function to the current output stream.
-///
void AsmPrinter::EmitJumpTableInfo() {
const DataLayout &DL = MF->getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1537,14 +1568,14 @@ void AsmPrinter::EmitJumpTableInfo() {
// Pick the directive to use to print the jump table entries, and switch to
// the appropriate section.
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
- *F);
+ F);
if (JTInDiffSection) {
// Drop it in the readonly section.
- MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM);
+ MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM);
OutStreamer->SwitchSection(ReadOnlySection);
}
@@ -1723,7 +1754,7 @@ struct Structor {
Structor() = default;
};
-} // end anonymous namespace
+} // end anonymous namespace
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
@@ -1818,13 +1849,11 @@ void AsmPrinter::EmitInt8(int Value) const {
}
/// EmitInt16 - Emit a short directive and value.
-///
void AsmPrinter::EmitInt16(int Value) const {
OutStreamer->EmitIntValue(Value, 2);
}
/// EmitInt32 - Emit a long directive and value.
-///
void AsmPrinter::EmitInt32(int Value) const {
OutStreamer->EmitIntValue(Value, 4);
}
@@ -1866,7 +1895,6 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// byte alignment. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
-//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
if (GV)
NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
@@ -1921,7 +1949,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction()->getParent());
+ !MF ? nullptr : MF->getFunction().getParent());
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
@@ -2317,7 +2345,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
// cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
// gotpcrelcst := <offset from @foo base> + <cst>
- //
MCValue MV;
if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
return;
@@ -2348,7 +2375,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
// If gotpcrelcst is positive it means that we can safely fold the pc rel
// displacement into the GOTPCREL. We can also can have an extra offset <cst>
// if the target knows how to encode it.
- //
int64_t GOTPCRelCst = Offset + MV.getConstant();
if (GOTPCRelCst < 0)
return;
@@ -2370,7 +2396,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
// .long 42
// foo:
// .long bar@GOTPCREL+<gotpcrelcst>
- //
AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
const GlobalVariable *GV = Result.first;
int NumUses = (int)Result.second;
@@ -2550,7 +2575,6 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
<< " Depth=" << Loop->getLoopDepth() << '\n';
}
-
/// PrintChildLoopComment - Print comments about child loops within
/// the loop for this basic block, with nesting.
static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
@@ -2603,6 +2627,23 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
+void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
+ MCCodePaddingContext &Context) const {
+ assert(MF != nullptr && "Machine function must be valid");
+ assert(LI != nullptr && "Loop info must be valid");
+ Context.IsPaddingActive = !MF->hasInlineAsm() &&
+ !MF->getFunction().optForSize() &&
+ TM.getOptLevel() != CodeGenOpt::None;
+ const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB);
+ Context.IsBasicBlockInsideInnermostLoop =
+ CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty();
+ Context.IsBasicBlockReachableViaFallthrough =
+ std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
+ MBB.pred_end();
+ Context.IsBasicBlockReachableViaBranch =
+ MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB);
+}
+
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2618,6 +2659,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB.getAlignment())
EmitAlignment(Align);
+ MCCodePaddingContext Context;
+ setupCodePaddingContext(MBB, Context);
+ OutStreamer->EmitCodePaddingBasicBlockStart(Context);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -2652,13 +2696,20 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
(isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
- OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
+ OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
+ false);
}
} else {
OutStreamer->EmitLabel(MBB.getSymbol());
}
}
+void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
+ MCCodePaddingContext Context;
+ setupCodePaddingContext(MBB, Context);
+ OutStreamer->EmitCodePaddingBasicBlockEnd(Context);
+}
+
void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition) const {
MCSymbolAttr Attr = MCSA_Invalid;
@@ -2765,10 +2816,13 @@ void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
Out->EmitSymbolValue(Sled, Bytes);
Out->EmitSymbolValue(CurrentFnSym, Bytes);
auto Kind8 = static_cast<uint8_t>(Kind);
- Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
- Out->EmitBytes(
+ Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+ Out->EmitBinaryData(
StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
- Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries
+ Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1));
+ auto Padding = (4 * Bytes) - ((2 * Bytes) + 3);
+ assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size");
+ Out->EmitZeros(Padding);
}
void AsmPrinter::emitXRayTable() {
@@ -2776,23 +2830,26 @@ void AsmPrinter::emitXRayTable() {
return;
auto PrevSection = OutStreamer->getCurrentSectionOnly();
- auto Fn = MF->getFunction();
+ const Function &F = MF->getFunction();
MCSection *InstMap = nullptr;
MCSection *FnSledIndex = nullptr;
if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
- if (Fn->hasComdat()) {
- InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
- Fn->getComdat()->getName());
- FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
- Fn->getComdat()->getName());
- } else {
- InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
- FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
+ auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym);
+ assert(Associated != nullptr);
+ auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
+ std::string GroupName;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName();
}
+
+ auto UniqueID = ++XRayFnUniqueID;
+ InstMap =
+ OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0,
+ GroupName, UniqueID, Associated);
+ FnSledIndex =
+ OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0,
+ GroupName, UniqueID, Associated);
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
@@ -2802,15 +2859,7 @@ void AsmPrinter::emitXRayTable() {
llvm_unreachable("Unsupported target");
}
- // Before we switch over, we force a reference to a label inside the
- // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept
- // live by the linker if the function is not garbage-collected. Since this
- // function is always called just before the function's end, we assume that
- // this is happening after the last return instruction.
auto WordSizeBytes = MAI->getCodePointerSize();
- MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true);
- OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false);
// Now we switch to the instrumentation map section. Because this is done
// per-function, we are able to create an index entry that will represent the
@@ -2829,24 +2878,23 @@ void AsmPrinter::emitXRayTable() {
// pointers. This should work for both 32-bit and 64-bit platforms.
OutStreamer->SwitchSection(FnSledIndex);
OutStreamer->EmitCodeAlignment(2 * WordSizeBytes);
- OutStreamer->EmitLabel(IdxRef);
- OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes);
- OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes);
+ OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false);
+ OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false);
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}
void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
- SledKind Kind) {
- auto Fn = MI.getParent()->getParent()->getFunction();
- auto Attr = Fn->getFnAttribute("function-instrument");
- bool LogArgs = Fn->hasFnAttribute("xray-log-args");
+ SledKind Kind, uint8_t Version) {
+ const Function &F = MI.getMF()->getFunction();
+ auto Attr = F.getFnAttribute("function-instrument");
+ bool LogArgs = F.hasFnAttribute("xray-log-args");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
Kind = SledKind::LOG_ARGS_ENTER;
- Sleds.emplace_back(
- XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+ Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind,
+ AlwaysInstrument, &F, Version});
}
uint16_t AsmPrinter::getDwarfVersion() const {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 0edf9051d342..08eb14e242c5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,14 +12,12 @@
//===----------------------------------------------------------------------===//
#include "ByteStreamer.h"
-#include "DwarfDebug.h"
-#include "DwarfExpression.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -28,9 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -48,12 +44,19 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
}
/// EmitULEB128 - emit the specified unsigned leb128 value.
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
- unsigned PadTo) const {
+void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
+ const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
- OutStreamer->EmitULEB128IntValue(Value, PadTo);
+ OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo);
+}
+
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->EmitULEB128IntValue(Value);
}
static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -212,6 +215,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpEscape:
OutStreamer->EmitCFIEscape(Inst.getValues());
break;
+ case MCCFIInstruction::OpRestore:
+ OutStreamer->EmitCFIRestore(Inst.getRegister());
+ break;
}
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index eae79ad101d3..04a72ba3d738 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -17,6 +17,9 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -32,10 +35,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -144,6 +144,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
+ Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
if (Dialect == InlineAsm::AD_Intel)
// We need this flag to be able to parse numbers like "0bH"
Parser->setParsingInlineAsm(true);
@@ -513,7 +514,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Reset SanitizeAddress based on the function's attribute.
MCTargetOptions MCOptions = TM.Options.MCOptions;
MCOptions.SanitizeAddress =
- MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress);
+ MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
MI->getInlineAsmDialect());
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index a81d56e9618b..1d0a003dc50a 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CodeViewDebug.h"
+#include "DwarfExpression.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -19,9 +20,9 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
@@ -34,9 +35,14 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -61,17 +67,15 @@
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -86,6 +90,9 @@
using namespace llvm;
using namespace llvm::codeview;
+static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section",
+ cl::ReallyHidden, cl::init(false));
+
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
// If module doesn't have named metadata anchors or COFF debug section
@@ -153,12 +160,19 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
}
unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
+ StringRef FullPath = getFullFilepath(F);
unsigned NextId = FileIdMap.size() + 1;
- auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
+ auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));
if (Insertion.second) {
// We have to compute the full filepath and emit a .cv_file directive.
- StringRef FullPath = getFullFilepath(F);
- bool Success = OS.EmitCVFileDirective(NextId, FullPath);
+ std::string Checksum = fromHex(F->getChecksum());
+ void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
+ memcpy(CKMem, Checksum.data(), Checksum.size());
+ ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
+ Checksum.size());
+ DIFile::ChecksumKind ChecksumKind = F->getChecksumKind();
+ bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
+ static_cast<unsigned>(ChecksumKind));
(void)Success;
assert(Success && ".cv_file directive failed");
}
@@ -270,7 +284,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
// Build the fully qualified name of the scope.
std::string ScopeName = getFullyQualifiedName(Scope);
StringIdRecord SID(TypeIndex(), ScopeName);
- auto TI = TypeTable.writeKnownType(SID);
+ auto TI = TypeTable.writeLeafType(SID);
return recordTypeIndexForDINode(Scope, TI);
}
@@ -295,12 +309,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
TypeIndex ClassType = getTypeIndex(Class);
MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
DisplayName);
- TI = TypeTable.writeKnownType(MFuncId);
+ TI = TypeTable.writeLeafType(MFuncId);
} else {
// Otherwise, this must be a free function.
TypeIndex ParentScope = getScopeIndex(Scope);
FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
- TI = TypeTable.writeKnownType(FuncId);
+ TI = TypeTable.writeLeafType(FuncId);
}
return recordTypeIndexForDINode(SP, TI);
@@ -324,8 +338,9 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
// function type, as the complete class type is likely to reference this
// member function type.
TypeLoweringScope S(*this);
- TypeIndex TI =
- lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment());
+ const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+ TypeIndex TI = lowerTypeMemberFunction(
+ SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod);
return recordTypeIndexForDINode(SP, TI, Class);
}
@@ -476,10 +491,13 @@ void CodeViewDebug::endModule() {
OS.AddComment("String table");
OS.EmitCVStringTableDirective();
- // Emit type information last, so that any types we translate while emitting
- // function info are included.
+ // Emit type information and hashes last, so that any types we translate while
+ // emitting function info are included.
emitTypeInformation();
+ if (EmitDebugGlobalHashes)
+ emitTypeGlobalHashes();
+
clear();
}
@@ -496,11 +514,6 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
}
void CodeViewDebug::emitTypeInformation() {
- // Do nothing if we have no debug info or if no non-trivial types were emitted
- // to TypeTable during codegen.
- NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
- if (!CU_Nodes)
- return;
if (TypeTable.empty())
return;
@@ -545,7 +558,39 @@ void CodeViewDebug::emitTypeInformation() {
}
}
-namespace {
+void CodeViewDebug::emitTypeGlobalHashes() {
+ if (TypeTable.empty())
+ return;
+
+ // Start the .debug$H section with the version and hash algorithm, currently
+ // hardcoded to version 0, SHA1.
+ OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
+
+ OS.EmitValueToAlignment(4);
+ OS.AddComment("Magic");
+ OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4);
+ OS.AddComment("Section Version");
+ OS.EmitIntValue(0, 2);
+ OS.AddComment("Hash Algorithm");
+ OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2);
+
+ TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
+ for (const auto &GHR : TypeTable.hashes()) {
+ if (OS.isVerboseAsm()) {
+ // Emit an EOL-comment describing which TypeIndex this hash corresponds
+ // to, as well as the stringified SHA1 hash.
+ SmallString<32> Comment;
+ raw_svector_ostream CommentOS(Comment);
+ CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR);
+ OS.AddComment(Comment);
+ ++TI;
+ }
+ assert(GHR.Hash.size() % 20 == 0);
+ StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),
+ GHR.Hash.size());
+ OS.EmitBinaryData(S);
+ }
+}
static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
switch (DWLang) {
@@ -572,6 +617,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::Cobol;
case dwarf::DW_LANG_Java:
return SourceLanguage::Java;
+ case dwarf::DW_LANG_D:
+ return SourceLanguage::D;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -580,9 +627,11 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
}
}
+namespace {
struct Version {
int Part[4];
};
+} // end anonymous namespace
// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
// the version number.
@@ -605,20 +654,19 @@ static Version parseVersion(StringRef Name) {
static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
switch (Type) {
- case Triple::ArchType::x86:
- return CPUType::Pentium3;
- case Triple::ArchType::x86_64:
- return CPUType::X64;
- case Triple::ArchType::thumb:
- return CPUType::Thumb;
- default:
- report_fatal_error("target architecture doesn't map to a CodeView "
- "CPUType");
+ case Triple::ArchType::x86:
+ return CPUType::Pentium3;
+ case Triple::ArchType::x86_64:
+ return CPUType::X64;
+ case Triple::ArchType::thumb:
+ return CPUType::Thumb;
+ case Triple::ArchType::aarch64:
+ return CPUType::ARM64;
+ default:
+ report_fatal_error("target architecture doesn't map to a CodeView CPUType");
}
}
-} // end anonymous namespace
-
void CodeViewDebug::emitCompilerInformation() {
MCContext &Context = MMI->getContext();
MCSymbol *CompilerBegin = Context.createTempSymbol(),
@@ -678,8 +726,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
OS.AddComment("Inlinee lines subsection");
MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines);
- // We don't provide any extra file info.
- // FIXME: Find out if debuggers use this info.
+ // We emit the checksum info for files. This is used by debuggers to
+ // determine if a pdb matches the source before loading it. Visual Studio,
+ // for instance, will display a warning that the breakpoints are not valid if
+ // the pdb does not match the source.
OS.AddComment("Inlinee lines signature");
OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
@@ -692,13 +742,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
OS.AddComment("Inlined function " + SP->getName() + " starts at " +
SP->getFilename() + Twine(':') + Twine(SP->getLine()));
OS.AddBlankLine();
- // The filechecksum table uses 8 byte entries for now, and file ids start at
- // 1.
- unsigned FileOffset = (FileId - 1) * 8;
OS.AddComment("Type index of inlined function");
OS.EmitIntValue(InlineeIdx.getIndex(), 4);
OS.AddComment("Offset into filechecksum table");
- OS.EmitIntValue(FileOffset, 4);
+ OS.EmitCVFileChecksumOffsetDirective(FileId);
OS.AddComment("Starting line number");
OS.EmitIntValue(SP->getLine(), 4);
}
@@ -799,6 +846,10 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
if (FuncName.empty())
FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
+ // Emit FPO data, but only on 32-bit x86. No other platforms use it.
+ if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
+ OS.EmitCVFPOData(Fn);
+
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
@@ -858,6 +909,30 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
emitInlinedCallSite(FI, InlinedAt, I->second);
}
+ for (auto Annot : FI.Annotations) {
+ MCSymbol *Label = Annot.first;
+ MDTuple *Strs = cast<MDTuple>(Annot.second);
+ MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(),
+ *AnnotEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2);
+ OS.EmitLabel(AnnotBegin);
+ OS.AddComment("Record kind: S_ANNOTATION");
+ OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2);
+ OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
+ // FIXME: Make sure we don't overflow the max record size.
+ OS.EmitCOFFSectionIndex(Label);
+ OS.EmitIntValue(Strs->getNumOperands(), 2);
+ for (Metadata *MD : Strs->operands()) {
+ // MDStrings are null terminated, so we can do EmitBytes and get the
+ // nice .asciz directive.
+ StringRef Str = cast<MDString>(MD)->getString();
+ assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
+ OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
+ }
+ OS.EmitLabel(AnnotEnd);
+ }
+
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
@@ -947,13 +1022,110 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
}
}
+static bool canUseReferenceType(const DbgVariableLocation &Loc) {
+ return !Loc.LoadChain.empty() && Loc.LoadChain.back() == 0;
+}
+
+static bool needsReferenceType(const DbgVariableLocation &Loc) {
+ return Loc.LoadChain.size() == 2 && Loc.LoadChain.back() == 0;
+}
+
+void CodeViewDebug::calculateRanges(
+ LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) {
+ const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+
+ // Calculate the definition ranges.
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const InsnRange &Range = *I;
+ const MachineInstr *DVInst = Range.first;
+ assert(DVInst->isDebugValue() && "Invalid History entry");
+ // FIXME: Find a way to represent constant variables, since they are
+ // relatively common.
+ Optional<DbgVariableLocation> Location =
+ DbgVariableLocation::extractFromMachineInstruction(*DVInst);
+ if (!Location)
+ continue;
+
+ // CodeView can only express variables in register and variables in memory
+ // at a constant offset from a register. However, for variables passed
+ // indirectly by pointer, it is common for that pointer to be spilled to a
+ // stack location. For the special case of one offseted load followed by a
+ // zero offset load (a pointer spilled to the stack), we change the type of
+ // the local variable from a value type to a reference type. This tricks the
+ // debugger into doing the load for us.
+ if (Var.UseReferenceType) {
+ // We're using a reference type. Drop the last zero offset load.
+ if (canUseReferenceType(*Location))
+ Location->LoadChain.pop_back();
+ else
+ continue;
+ } else if (needsReferenceType(*Location)) {
+ // This location can't be expressed without switching to a reference type.
+ // Start over using that.
+ Var.UseReferenceType = true;
+ Var.DefRanges.clear();
+ calculateRanges(Var, Ranges);
+ return;
+ }
+
+ // We can only handle a register or an offseted load of a register.
+ if (Location->Register == 0 || Location->LoadChain.size() > 1)
+ continue;
+ {
+ LocalVarDefRange DR;
+ DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
+ DR.InMemory = !Location->LoadChain.empty();
+ DR.DataOffset =
+ !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
+ if (Location->FragmentInfo) {
+ DR.IsSubfield = true;
+ DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
+ } else {
+ DR.IsSubfield = false;
+ DR.StructOffset = 0;
+ }
+
+ if (Var.DefRanges.empty() ||
+ Var.DefRanges.back().isDifferentLocation(DR)) {
+ Var.DefRanges.emplace_back(std::move(DR));
+ }
+ }
+
+ // Compute the label range.
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ if (!End) {
+ // This range is valid until the next overlapping bitpiece. In the
+ // common case, ranges will not be bitpieces, so they will overlap.
+ auto J = std::next(I);
+ const DIExpression *DIExpr = DVInst->getDebugExpression();
+ while (J != E &&
+ !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+ ++J;
+ if (J != E)
+ End = getLabelBeforeInsn(J->first);
+ else
+ End = Asm->getFunctionEnd();
+ }
+
+ // If the last range end is our begin, just extend the last range.
+ // Otherwise make a new range.
+ SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R =
+ Var.DefRanges.back().Ranges;
+ if (!R.empty() && R.back().second == Begin)
+ R.back().second = End;
+ else
+ R.emplace_back(Begin, End);
+
+ // FIXME: Do more range combining.
+ }
+}
+
void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
DenseSet<InlinedVariable> Processed;
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(Processed);
- const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
-
for (const auto &I : DbgValues) {
InlinedVariable IV = I.first;
if (Processed.count(IV))
@@ -976,87 +1148,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
LocalVariable Var;
Var.DIVar = DIVar;
- // Calculate the definition ranges.
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const InsnRange &Range = *I;
- const MachineInstr *DVInst = Range.first;
- assert(DVInst->isDebugValue() && "Invalid History entry");
- const DIExpression *DIExpr = DVInst->getDebugExpression();
- bool IsSubfield = false;
- unsigned StructOffset = 0;
-
- // Handle fragments.
- auto Fragment = DIExpr->getFragmentInfo();
- if (Fragment) {
- IsSubfield = true;
- StructOffset = Fragment->OffsetInBits / 8;
- } else if (DIExpr->getNumElements() > 0) {
- continue; // Ignore unrecognized exprs.
- }
-
- // Bail if operand 0 is not a valid register. This means the variable is a
- // simple constant, or is described by a complex expression.
- // FIXME: Find a way to represent constant variables, since they are
- // relatively common.
- unsigned Reg =
- DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0;
- if (Reg == 0)
- continue;
-
- // Handle the two cases we can handle: indirect in memory and in register.
- unsigned CVReg = TRI->getCodeViewRegNum(Reg);
- bool InMemory = DVInst->getOperand(1).isImm();
- int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0;
- {
- LocalVarDefRange DR;
- DR.CVRegister = CVReg;
- DR.InMemory = InMemory;
- DR.DataOffset = Offset;
- DR.IsSubfield = IsSubfield;
- DR.StructOffset = StructOffset;
-
- if (Var.DefRanges.empty() ||
- Var.DefRanges.back().isDifferentLocation(DR)) {
- Var.DefRanges.emplace_back(std::move(DR));
- }
- }
-
- // Compute the label range.
- const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
- const MCSymbol *End = getLabelAfterInsn(Range.second);
- if (!End) {
- // This range is valid until the next overlapping bitpiece. In the
- // common case, ranges will not be bitpieces, so they will overlap.
- auto J = std::next(I);
- while (J != E &&
- !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
- ++J;
- if (J != E)
- End = getLabelBeforeInsn(J->first);
- else
- End = Asm->getFunctionEnd();
- }
-
- // If the last range end is our begin, just extend the last range.
- // Otherwise make a new range.
- SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges =
- Var.DefRanges.back().Ranges;
- if (!Ranges.empty() && Ranges.back().second == Begin)
- Ranges.back().second = End;
- else
- Ranges.emplace_back(Begin, End);
-
- // FIXME: Do more range combining.
- }
-
+ calculateRanges(Var, Ranges);
recordLocalVariable(std::move(Var), InlinedAt);
}
}
void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
- const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV) == false);
- CurFn = &FnDebugInfo[GV];
+ const Function &GV = MF->getFunction();
+ assert(FnDebugInfo.count(&GV) == false);
+ CurFn = &FnDebugInfo[&GV];
CurFn->FuncId = NextFuncId++;
CurFn->Begin = Asm->getFunctionBegin();
@@ -1087,10 +1187,40 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
}
}
-void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
+static bool shouldEmitUdt(const DIType *T) {
+ if (!T)
+ return false;
+
+ // MSVC does not emit UDTs for typedefs that are scoped to classes.
+ if (T->getTag() == dwarf::DW_TAG_typedef) {
+ if (DIScope *Scope = T->getScope().resolve()) {
+ switch (Scope->getTag()) {
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_union_type:
+ return false;
+ }
+ }
+ }
+
+ while (true) {
+ if (!T || T->isForwardDecl())
+ return false;
+
+ const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
+ if (!DT)
+ return true;
+ T = DT->getBaseType().resolve();
+ }
+ return true;
+}
+
+void CodeViewDebug::addToUDTs(const DIType *Ty) {
// Don't record empty UDTs.
if (Ty->getName().empty())
return;
+ if (!shouldEmitUdt(Ty))
+ return;
SmallVector<StringRef, 5> QualifiedNameComponents;
const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
@@ -1099,10 +1229,11 @@ void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
std::string FullyQualifiedName =
getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
- if (ClosestSubprogram == nullptr)
- GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
- else if (ClosestSubprogram == CurrentSubprogram)
- LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+ if (ClosestSubprogram == nullptr) {
+ GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+ } else if (ClosestSubprogram == CurrentSubprogram) {
+ LocalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+ }
// TODO: What if the ClosestSubprogram is neither null or the current
// subprogram? Currently, the UDT just gets dropped on the floor.
@@ -1139,7 +1270,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
// The member function type of a member function pointer has no
// ThisAdjustment.
return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy,
- /*ThisAdjustment=*/0);
+ /*ThisAdjustment=*/0,
+ /*IsStaticMethod=*/false);
}
return lowerTypeFunction(cast<DISubroutineType>(Ty));
case dwarf::DW_TAG_enumeration_type:
@@ -1160,7 +1292,7 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
StringRef TypeName = Ty->getName();
- addToUDTs(Ty, UnderlyingTypeIndex);
+ addToUDTs(Ty);
if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) &&
TypeName == "HRESULT")
@@ -1193,11 +1325,12 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
"codeview doesn't support subranges with lower bounds");
int64_t Count = Subrange->getCount();
- // Variable Length Array (VLA) has Count equal to '-1'.
- // Replace with Count '1', assume it is the minimum VLA length.
- // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
+ // Forward declarations of arrays without a size and VLAs use a count of -1.
+ // Emit a count of zero in these cases to match what MSVC does for arrays
+ // without a size. MSVC doesn't support VLAs, so it's not clear what we
+ // should do for them even if we could distinguish them.
if (Count == -1)
- Count = 1;
+ Count = 0;
// Update the element size and element type index for subsequent subranges.
ElementSize *= Count;
@@ -1209,7 +1342,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
StringRef Name = (i == 0) ? Ty->getName() : "";
ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
- ElementTypeIndex = TypeTable.writeKnownType(AR);
+ ElementTypeIndex = TypeTable.writeLeafType(AR);
}
return ElementTypeIndex;
@@ -1342,7 +1475,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
// do.
PointerOptions PO = PointerOptions::None;
PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
- return TypeTable.writeKnownType(PR);
+ return TypeTable.writeLeafType(PR);
}
static PointerToMemberRepresentation
@@ -1393,7 +1526,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
MemberPointerInfo MPI(
ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
- return TypeTable.writeKnownType(PR);
+ return TypeTable.writeLeafType(PR);
}
/// Given a DWARF calling convention, get the CodeView equivalent. If we don't
@@ -1432,7 +1565,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
}
TypeIndex ModifiedTI = getTypeIndex(BaseTy);
ModifierRecord MR(ModifiedTI, Mods);
- return TypeTable.writeKnownType(MR);
+ return TypeTable.writeLeafType(MR);
}
TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
@@ -1449,18 +1582,19 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
}
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
- TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
+ TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
ArgTypeIndices.size(), ArgListIndex);
- return TypeTable.writeKnownType(Procedure);
+ return TypeTable.writeLeafType(Procedure);
}
TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
const DIType *ClassTy,
- int ThisAdjustment) {
+ int ThisAdjustment,
+ bool IsStaticMethod) {
// Lower the containing class type.
TypeIndex ClassType = getTypeIndex(ClassTy);
@@ -1475,26 +1609,22 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
ReturnTypeIndex = ReturnAndArgTypesRef.front();
ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
}
- TypeIndex ThisTypeIndex = TypeIndex::Void();
- if (!ArgTypeIndices.empty()) {
+ TypeIndex ThisTypeIndex;
+ if (!IsStaticMethod && !ArgTypeIndices.empty()) {
ThisTypeIndex = ArgTypeIndices.front();
ArgTypeIndices = ArgTypeIndices.drop_front();
}
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
- TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
+ TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
- // TODO: Need to use the correct values for:
- // FunctionOptions
- // ThisPointerAdjustment.
+ // TODO: Need to use the correct values for FunctionOptions.
MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
FunctionOptions::None, ArgTypeIndices.size(),
ArgListIndex, ThisAdjustment);
- TypeIndex TI = TypeTable.writeKnownType(MFR);
-
- return TI;
+ return TypeTable.writeLeafType(MFR);
}
TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
@@ -1503,7 +1633,7 @@ TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
VFTableShapeRecord VFTSR(Slots);
- return TypeTable.writeKnownType(VFTSR);
+ return TypeTable.writeLeafType(VFTSR);
}
static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
@@ -1530,6 +1660,9 @@ static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) {
static MethodKind translateMethodKindFlags(const DISubprogram *SP,
bool Introduced) {
+ if (SP->getFlags() & DINode::FlagStaticMember)
+ return MethodKind::Static;
+
switch (SP->getVirtuality()) {
case dwarf::DW_VIRTUALITY_none:
break;
@@ -1542,8 +1675,6 @@ static MethodKind translateMethodKindFlags(const DISubprogram *SP,
llvm_unreachable("unhandled virtuality case");
}
- // FIXME: Get Clang to mark DISubprogram as static and do something with it.
-
return MethodKind::Vanilla;
}
@@ -1593,9 +1724,8 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
if (Ty->isForwardDecl()) {
CO |= ClassOptions::ForwardReference;
} else {
- FieldListRecordBuilder FLRB(TypeTable);
-
- FLRB.begin();
+ ContinuationRecordBuilder ContinuationBuilder;
+ ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
for (const DINode *Element : Ty->getElements()) {
// We assume that the frontend provides all members in source declaration
// order, which is what MSVC does.
@@ -1603,18 +1733,18 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
EnumeratorRecord ER(MemberAccess::Public,
APSInt::getUnsigned(Enumerator->getValue()),
Enumerator->getName());
- FLRB.writeMemberType(ER);
+ ContinuationBuilder.writeMemberType(ER);
EnumeratorCount++;
}
}
- FTI = FLRB.end(true);
+ FTI = TypeTable.insertRecord(ContinuationBuilder);
}
std::string FullName = getFullyQualifiedName(Ty);
EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
getTypeIndex(Ty->getBaseType()));
- return TypeTable.writeKnownType(ER);
+ return TypeTable.writeLeafType(ER);
}
//===----------------------------------------------------------------------===//
@@ -1643,7 +1773,7 @@ struct llvm::ClassInfo {
TypeIndex VShapeTI;
- std::vector<const DICompositeType *> NestedClasses;
+ std::vector<const DIType *> NestedTypes;
};
void CodeViewDebug::clear() {
@@ -1694,12 +1824,14 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
} else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
DDTy->getName() == "__vtbl_ptr_type") {
Info.VShapeTI = getTypeIndex(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_typedef) {
+ Info.NestedTypes.push_back(DDTy);
} else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
// Ignore friend members. It appears that MSVC emitted info about
// friends in the past, but modern versions do not.
}
} else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
- Info.NestedClasses.push_back(Composite);
+ Info.NestedTypes.push_back(Composite);
}
// Skip other unrecognized kinds of elements.
}
@@ -1715,7 +1847,7 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
std::string FullName = getFullyQualifiedName(Ty);
ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
FullName, Ty->getIdentifier());
- TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR);
+ TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR);
if (!Ty->isForwardDecl())
DeferredCompleteTypes.push_back(Ty);
return FwdDeclTI;
@@ -1741,16 +1873,17 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
SizeInBytes, FullName, Ty->getIdentifier());
- TypeIndex ClassTI = TypeTable.writeKnownType(CR);
+ TypeIndex ClassTI = TypeTable.writeLeafType(CR);
if (const auto *File = Ty->getFile()) {
StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
- TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+ TypeIndex SIDI = TypeTable.writeLeafType(SIDR);
+
UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
- TypeTable.writeKnownType(USLR);
+ TypeTable.writeLeafType(USLR);
}
- addToUDTs(Ty, ClassTI);
+ addToUDTs(Ty);
return ClassTI;
}
@@ -1760,7 +1893,7 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
ClassOptions::ForwardReference | getCommonClassOptions(Ty);
std::string FullName = getFullyQualifiedName(Ty);
UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
- TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR);
+ TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR);
if (!Ty->isForwardDecl())
DeferredCompleteTypes.push_back(Ty);
return FwdDeclTI;
@@ -1782,14 +1915,15 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
Ty->getIdentifier());
- TypeIndex UnionTI = TypeTable.writeKnownType(UR);
+ TypeIndex UnionTI = TypeTable.writeLeafType(UR);
StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
- TypeIndex SIRI = TypeTable.writeKnownType(SIR);
+ TypeIndex SIRI = TypeTable.writeLeafType(SIR);
+
UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
- TypeTable.writeKnownType(USLR);
+ TypeTable.writeLeafType(USLR);
- addToUDTs(Ty, UnionTI);
+ addToUDTs(Ty);
return UnionTI;
}
@@ -1802,8 +1936,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
// list record.
unsigned MemberCount = 0;
ClassInfo Info = collectClassInfo(Ty);
- FieldListRecordBuilder FLBR(TypeTable);
- FLBR.begin();
+ ContinuationRecordBuilder ContinuationBuilder;
+ ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
// Create base classes.
for (const DIDerivedType *I : Info.Inheritance) {
@@ -1821,14 +1955,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
VBTableIndex);
- FLBR.writeMemberType(VBCR);
+ ContinuationBuilder.writeMemberType(VBCR);
} else {
assert(I->getOffsetInBits() % 8 == 0 &&
"bases must be on byte boundaries");
BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
getTypeIndex(I->getBaseType()),
I->getOffsetInBits() / 8);
- FLBR.writeMemberType(BCR);
+ ContinuationBuilder.writeMemberType(BCR);
}
}
@@ -1842,7 +1976,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
if (Member->isStaticMember()) {
StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
- FLBR.writeMemberType(SDMR);
+ ContinuationBuilder.writeMemberType(SDMR);
MemberCount++;
continue;
}
@@ -1851,7 +1985,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
if ((Member->getFlags() & DINode::FlagArtificial) &&
Member->getName().startswith("_vptr$")) {
VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
- FLBR.writeMemberType(VFPR);
+ ContinuationBuilder.writeMemberType(VFPR);
MemberCount++;
continue;
}
@@ -1868,12 +2002,12 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
StartBitOffset -= MemberOffsetInBits;
BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
StartBitOffset);
- MemberBaseType = TypeTable.writeKnownType(BFR);
+ MemberBaseType = TypeTable.writeLeafType(BFR);
}
uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
MemberName);
- FLBR.writeMemberType(DMR);
+ ContinuationBuilder.writeMemberType(DMR);
MemberCount++;
}
@@ -1898,40 +2032,42 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
}
assert(!Methods.empty() && "Empty methods map entry");
if (Methods.size() == 1)
- FLBR.writeMemberType(Methods[0]);
+ ContinuationBuilder.writeMemberType(Methods[0]);
else {
+ // FIXME: Make this use its own ContinuationBuilder so that
+ // MethodOverloadList can be split correctly.
MethodOverloadListRecord MOLR(Methods);
- TypeIndex MethodList = TypeTable.writeKnownType(MOLR);
+ TypeIndex MethodList = TypeTable.writeLeafType(MOLR);
+
OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
- FLBR.writeMemberType(OMR);
+ ContinuationBuilder.writeMemberType(OMR);
}
}
// Create nested classes.
- for (const DICompositeType *Nested : Info.NestedClasses) {
+ for (const DIType *Nested : Info.NestedTypes) {
NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
- FLBR.writeMemberType(R);
+ ContinuationBuilder.writeMemberType(R);
MemberCount++;
}
- TypeIndex FieldTI = FLBR.end(true);
+ TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder);
return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
- !Info.NestedClasses.empty());
+ !Info.NestedTypes.empty());
}
TypeIndex CodeViewDebug::getVBPTypeIndex() {
if (!VBPType.getIndex()) {
// Make a 'const int *' type.
ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
- TypeIndex ModifiedTI = TypeTable.writeKnownType(MR);
+ TypeIndex ModifiedTI = TypeTable.writeLeafType(MR);
PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
: PointerKind::Near32;
PointerMode PM = PointerMode::Pointer;
PointerOptions PO = PointerOptions::None;
PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
-
- VBPType = TypeTable.writeKnownType(PR);
+ VBPType = TypeTable.writeLeafType(PR);
}
return VBPType;
@@ -1957,6 +2093,16 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
return recordTypeIndexForDINode(Ty, TI, ClassTy);
}
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
+ DIType *Ty = TypeRef.resolve();
+ PointerRecord PR(getTypeIndex(Ty),
+ getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32,
+ PointerMode::LValueReference, PointerOptions::None,
+ Ty->getSizeInBits() / 8);
+ return TypeTable.writeLeafType(PR);
+}
+
TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
const DIType *Ty = TypeRef.resolve();
@@ -2064,7 +2210,9 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
Flags |= LocalSymFlags::IsOptimizedOut;
OS.AddComment("TypeIndex");
- TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType());
+ TypeIndex TI = Var.UseReferenceType
+ ? getTypeIndexForReferenceTo(Var.DIVar->getType())
+ : getCompleteTypeIndex(Var.DIVar->getType());
OS.EmitIntValue(TI.getIndex(), 4);
OS.AddComment("Flags");
OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
@@ -2125,19 +2273,21 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
}
void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
- const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV));
- assert(CurFn == &FnDebugInfo[GV]);
+ const Function &GV = MF->getFunction();
+ assert(FnDebugInfo.count(&GV));
+ assert(CurFn == &FnDebugInfo[&GV]);
- collectVariableInfo(GV->getSubprogram());
+ collectVariableInfo(GV.getSubprogram());
// Don't emit anything if we don't have any line tables.
if (!CurFn->HaveLineInfo) {
- FnDebugInfo.erase(GV);
+ FnDebugInfo.erase(&GV);
CurFn = nullptr;
return;
}
+ CurFn->Annotations = MF->getCodeViewAnnotations();
+
CurFn->End = Asm->getFunctionEnd();
CurFn = nullptr;
@@ -2156,6 +2306,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugLoc DL = MI->getDebugLoc();
if (!DL && MI->getParent() != PrevInstBB) {
for (const auto &NextMI : *MI->getParent()) {
+ if (NextMI.isDebugValue())
+ continue;
DL = NextMI.getDebugLoc();
if (DL)
break;
@@ -2187,8 +2339,11 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
}
void CodeViewDebug::emitDebugInfoForUDTs(
- ArrayRef<std::pair<std::string, TypeIndex>> UDTs) {
- for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) {
+ ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
+ for (const auto &UDT : UDTs) {
+ const DIType *T = UDT.second;
+ assert(shouldEmitUdt(T));
+
MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
*UDTRecordEnd = MMI->getContext().createTempSymbol();
OS.AddComment("Record length");
@@ -2199,7 +2354,7 @@ void CodeViewDebug::emitDebugInfoForUDTs(
OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
OS.AddComment("Type");
- OS.EmitIntValue(UDT.second.getIndex(), 4);
+ OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
emitNullTerminatedSymbolName(OS, UDT.first);
OS.EmitLabel(UDTRecordEnd);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index fd8f60425c24..69e93640d7ef 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -23,8 +23,8 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -52,7 +52,7 @@ class MachineFunction;
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
MCStreamer &OS;
BumpPtrAllocator Allocator;
- codeview::TypeTableBuilder TypeTable;
+ codeview::GlobalTypeTableBuilder TypeTable;
/// Represents the most general definition range.
struct LocalVarDefRange {
@@ -94,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
struct LocalVariable {
const DILocalVariable *DIVar = nullptr;
SmallVector<LocalVarDefRange, 1> DefRanges;
+ bool UseReferenceType = false;
};
struct InlineSite {
@@ -118,6 +119,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
+ std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
@@ -147,6 +150,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
+ void calculateRanges(LocalVariable &Var,
+ const DbgValueHistoryMap::InstrRanges &Ranges);
+
static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
const FunctionInfo &FI,
const InlineSite &Site);
@@ -155,8 +161,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// emit at the end of the TU.
MapVector<const Function *, FunctionInfo> FnDebugInfo;
- /// Map from DIFile to .cv_file id.
- DenseMap<const DIFile *, unsigned> FileIdMap;
+ /// Map from full file path to .cv_file id. Full paths are built from DIFiles
+ /// and are stored in FileToFilepathMap;
+ DenseMap<StringRef, unsigned> FileIdMap;
/// All inlined subprograms in the order they should be emitted.
SmallSetVector<const DISubprogram *, 4> InlinedSubprograms;
@@ -187,8 +194,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// The UDTs we have seen while processing types; each entry is a pair of type
// index and type name.
- std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
- GlobalUDTs;
+ std::vector<std::pair<std::string, const DIType *>> LocalUDTs;
+ std::vector<std::pair<std::string, const DIType *>> GlobalUDTs;
using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
FileToFilepathMapTy FileToFilepathMap;
@@ -212,6 +219,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitTypeInformation();
+ void emitTypeGlobalHashes();
+
void emitCompilerInformation();
void emitInlineeLinesSubsection();
@@ -222,8 +231,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForRetainedTypes();
- void emitDebugInfoForUDTs(
- ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
+ void
+ emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
const GlobalVariable *GV, MCSymbol *GVSym);
@@ -259,6 +268,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
DITypeRef ClassTyRef = DITypeRef());
+ codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
+
codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
const DICompositeType *Class);
@@ -266,7 +277,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getVBPTypeIndex();
- void addToUDTs(const DIType *Ty, codeview::TypeIndex TI);
+ void addToUDTs(const DIType *Ty);
codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
@@ -279,7 +290,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
const DIType *ClassTy,
- int ThisAdjustment);
+ int ThisAdjustment,
+ bool IsStaticMethod);
codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 886e6e264b3e..b3148db30cd6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -777,6 +777,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ case dwarf::DW_FORM_data16: break;
}
for (const auto &V : values())
@@ -791,6 +792,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
case dwarf::DW_FORM_block: return Size + getULEB128Size(Size);
+ case dwarf::DW_FORM_data16: return 16;
default: llvm_unreachable("Improper form for block");
}
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index c2ad9db81cfd..856758c8e4f6 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===//
+//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,17 +9,24 @@
#include "DbgValueHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
#include <map>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
@@ -72,10 +79,12 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
}
namespace {
+
// Maps physreg numbers to the variables they describe.
-typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
-typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
-}
+using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
+
+} // end anonymous namespace
// \brief Claim that @Var is not described by @RegNo anymore.
static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
@@ -83,7 +92,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
const auto &I = RegVars.find(RegNo);
assert(RegNo != 0U && I != RegVars.end());
auto &VarSet = I->second;
- const auto &VarPos = find(VarSet, Var);
+ const auto &VarPos = llvm::find(VarSet, Var);
assert(VarPos != VarSet.end());
VarSet.erase(VarPos);
// Don't keep empty sets in a map to keep it as small as possible.
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 16d2d7fd7e99..a7b0562e8102 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===//
+//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,9 +13,11 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include <utility>
namespace llvm {
+class DILocalVariable;
class MachineFunction;
class MachineInstr;
class TargetRegisterInfo;
@@ -29,11 +31,11 @@ class DbgValueHistoryMap {
// instruction of the next instruction range, or until the end of the
// function.
public:
- typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
- typedef SmallVector<InstrRange, 4> InstrRanges;
- typedef std::pair<const DILocalVariable *, const DILocation *>
- InlinedVariable;
- typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+ using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
+ using InstrRanges = SmallVector<InstrRange, 4>;
+ using InlinedVariable =
+ std::pair<const DILocalVariable *, const DILocation *>;
+ using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>;
private:
InstrRangesMap VarInstrRanges;
@@ -41,6 +43,7 @@ private:
public:
void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
+
// Returns register currently describing @Var. If @Var is currently
// unaccessible or is not described by a register, returns 0.
unsigned getRegisterForVar(InlinedVariable Var) const;
@@ -54,6 +57,7 @@ public:
void calculateDbgValueHistory(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
DbgValueHistoryMap &Result);
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 0971c5942203..d94b0e5c2118 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -13,16 +13,76 @@
//===----------------------------------------------------------------------===//
#include "DebugHandlerBase.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+Optional<DbgVariableLocation>
+DbgVariableLocation::extractFromMachineInstruction(
+ const MachineInstr &Instruction) {
+ DbgVariableLocation Location;
+ if (!Instruction.isDebugValue())
+ return None;
+ if (!Instruction.getOperand(0).isReg())
+ return None;
+ Location.Register = Instruction.getOperand(0).getReg();
+ Location.FragmentInfo.reset();
+ // We only handle expressions generated by DIExpression::appendOffset,
+ // which doesn't require a full stack machine.
+ int64_t Offset = 0;
+ const DIExpression *DIExpr = Instruction.getDebugExpression();
+ auto Op = DIExpr->expr_op_begin();
+ while (Op != DIExpr->expr_op_end()) {
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_constu: {
+ int Value = Op->getArg(0);
+ ++Op;
+ if (Op != DIExpr->expr_op_end()) {
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_minus:
+ Offset -= Value;
+ break;
+ case dwarf::DW_OP_plus:
+ Offset += Value;
+ break;
+ default:
+ continue;
+ }
+ }
+ } break;
+ case dwarf::DW_OP_plus_uconst:
+ Offset += Op->getArg(0);
+ break;
+ case dwarf::DW_OP_LLVM_fragment:
+ Location.FragmentInfo = {Op->getArg(1), Op->getArg(0)};
+ break;
+ case dwarf::DW_OP_deref:
+ Location.LoadChain.push_back(Offset);
+ Offset = 0;
+ break;
+ default:
+ return None;
+ }
+ ++Op;
+ }
+
+ // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE
+ // instruction.
+ // FIXME: Replace these with DIExpression.
+ if (Instruction.isIndirectDebugValue())
+ Location.LoadChain.push_back(Offset);
+
+ return Location;
+}
+
DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
// Each LexicalScope has first instruction and last instruction to mark
@@ -119,7 +179,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return false;
- auto *SP = MF->getFunction()->getSubprogram();
+ auto *SP = MF->getFunction().getSubprogram();
if (!SP)
return false;
assert(SP->getUnit());
@@ -163,7 +223,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// label, so arguments are visible when breaking at function entry.
const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
if (DIVar->isParameter() &&
- getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+ getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
if (Ranges.front().first->getDebugExpression()->isFragment()) {
// Mark all non-overlapping initial fragments.
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index 659a921e1fc5..245d70038de9 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -17,14 +17,38 @@
#include "AsmPrinterHandler.h"
#include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfoMetadata.h"
namespace llvm {
class AsmPrinter;
+class MachineInstr;
class MachineModuleInfo;
+/// Represents the location at which a variable is stored.
+struct DbgVariableLocation {
+ /// Base register.
+ unsigned Register;
+
+ /// Chain of offsetted loads necessary to load the value if it lives in
+ /// memory. Every load except for the last is pointer-sized.
+ SmallVector<int64_t, 1> LoadChain;
+
+ /// Present if the location is part of a larger variable.
+ llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo;
+
+ /// Extract a VariableLocation from a MachineInstr.
+ /// This will only work if Instruction is a debug value instruction
+ /// and the associated DIExpression is in one of the supported forms.
+ /// If these requirements are not met, the returned Optional will not
+ /// have a value.
+ static Optional<DbgVariableLocation>
+ extractFromMachineInstruction(const MachineInstr &Instruction);
+};
+
/// Base class for debug information backends. Common functionality related to
/// tracking which variables and scopes are alive at a given PC live here.
class DebugHandlerBase : public AsmPrinterHandler {
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index a68e8cc6b4b3..3d6d8a76529c 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -81,7 +81,7 @@ public:
if (isLocation()) {
llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
if (Loc.isIndirect())
- llvm::dbgs() << '+' << Loc.getOffset();
+ llvm::dbgs() << "+0";
llvm::dbgs() << "} ";
}
else if (isConstantInt())
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 9c324ea26ac8..c21b3d3451ad 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -1,4 +1,4 @@
-//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,16 +12,22 @@
//===----------------------------------------------------------------------===//
#include "DwarfAccelTable.h"
-#include "DwarfCompileUnit.h"
-#include "DwarfDebug.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <vector>
using namespace llvm;
@@ -142,13 +148,13 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
unsigned index = 0;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
Asm->OutStreamer->AddComment("Bucket " + Twine(i));
- if (Buckets[i].size() != 0)
+ if (!Buckets[i].empty())
Asm->EmitInt32(index);
else
- Asm->EmitInt32(UINT32_MAX);
+ Asm->EmitInt32(std::numeric_limits<uint32_t>::max());
// Buckets point in the list of hashes, not to the data. Do not
// increment the index multiple times in case of hash collisions.
- uint64_t PrevHash = UINT64_MAX;
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (auto *HD : Buckets[i]) {
uint32_t HashValue = HD->HashValue;
if (PrevHash != HashValue)
@@ -161,7 +167,7 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
// Walk through the buckets and emit the individual hashes for each
// bucket.
void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
- uint64_t PrevHash = UINT64_MAX;
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
@@ -181,7 +187,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
// beginning of the section. The non-section symbol will be output later
// when we emit the actual data.
void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
- uint64_t PrevHash = UINT64_MAX;
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
@@ -205,13 +211,14 @@ void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
// Terminate each HashData bucket with 0.
void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
- uint64_t PrevHash = UINT64_MAX;
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
// Terminate the previous entry if there is no hash collision
// with the current one.
- if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+ if (PrevHash != std::numeric_limits<uint64_t>::max() &&
+ PrevHash != (*HI)->HashValue)
Asm->EmitInt32(0);
// Remember to emit the label for our offset.
Asm->OutStreamer->EmitLabel((*HI)->Sym);
@@ -257,31 +264,30 @@ void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
}
#ifndef NDEBUG
-void DwarfAccelTable::print(raw_ostream &O) {
-
- Header.print(O);
- HeaderData.print(O);
+void DwarfAccelTable::print(raw_ostream &OS) {
+ Header.print(OS);
+ HeaderData.print(OS);
- O << "Entries: \n";
+ OS << "Entries: \n";
for (StringMap<DataArray>::const_iterator EI = Entries.begin(),
EE = Entries.end();
EI != EE; ++EI) {
- O << "Name: " << EI->getKeyData() << "\n";
+ OS << "Name: " << EI->getKeyData() << "\n";
for (HashDataContents *HD : EI->second.Values)
- HD->print(O);
+ HD->print(OS);
}
- O << "Buckets and Hashes: \n";
+ OS << "Buckets and Hashes: \n";
for (size_t i = 0, e = Buckets.size(); i < e; ++i)
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI)
- (*HI)->print(O);
+ (*HI)->print(OS);
- O << "Data: \n";
+ OS << "Data: \n";
for (std::vector<HashData *>::const_iterator DI = Data.begin(),
DE = Data.end();
DI != DE; ++DI)
- (*DI)->print(O);
+ (*DI)->print(OS);
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index b1ef8cfe989d..f56199dc8e72 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -15,16 +15,19 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
#include <vector>
// The dwarf accelerator tables are an indirect hash table optimized
@@ -65,44 +68,35 @@ class AsmPrinter;
class DwarfDebug;
class DwarfAccelTable {
-
- static uint32_t HashDJB(StringRef Str) {
- uint32_t h = 5381;
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- h = ((h << 5) + h) + Str[i];
- return h;
- }
-
// Helper function to compute the number of buckets needed based on
// the number of unique hashes.
- void ComputeBucketCount(void);
+ void ComputeBucketCount();
struct TableHeader {
- uint32_t magic; // 'HASH' magic value to allow endian detection
- uint16_t version; // Version number.
- uint16_t hash_function; // The hash function enumeration that was used.
- uint32_t bucket_count; // The number of buckets in this hash table.
- uint32_t hashes_count; // The total number of unique hash values
- // and hash data offsets in this table.
- uint32_t header_data_len; // The bytes to skip to get to the hash
- // indexes (buckets) for correct alignment.
+ uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection
+ uint16_t version = 1; // Version number.
+ uint16_t hash_function = dwarf::DW_hash_function_djb;
+ // The hash function enumeration that was used.
+ uint32_t bucket_count = 0; // The number of buckets in this hash table.
+ uint32_t hashes_count = 0; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
// Also written to disk is the implementation specific header data.
static const uint32_t MagicHash = 0x48415348;
- TableHeader(uint32_t data_len)
- : magic(MagicHash), version(1),
- hash_function(dwarf::DW_hash_function_djb), bucket_count(0),
- hashes_count(0), header_data_len(data_len) {}
+ TableHeader(uint32_t data_len) : header_data_len(data_len) {}
#ifndef NDEBUG
- void print(raw_ostream &O) {
- O << "Magic: " << format("0x%x", magic) << "\n"
- << "Version: " << version << "\n"
- << "Hash Function: " << hash_function << "\n"
- << "Bucket Count: " << bucket_count << "\n"
- << "Header Data Length: " << header_data_len << "\n";
+ void print(raw_ostream &OS) {
+ OS << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
}
+
void dump() { print(dbgs()); }
#endif
};
@@ -127,11 +121,13 @@ public:
uint16_t form; // DWARF DW_FORM_ defines
constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
+
#ifndef NDEBUG
- void print(raw_ostream &O) {
- O << "Type: " << dwarf::AtomTypeString(type) << "\n"
- << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ void print(raw_ostream &OS) {
+ OS << "Type: " << dwarf::AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
}
+
void dump() { print(dbgs()); }
#endif
};
@@ -145,11 +141,12 @@ private:
: die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
#ifndef NDEBUG
- void print(raw_ostream &O) {
- O << "die_offset_base: " << die_offset_base << "\n";
+ void print(raw_ostream &OS) {
+ OS << "die_offset_base: " << die_offset_base << "\n";
for (size_t i = 0; i < Atoms.size(); i++)
- Atoms[i].print(O);
+ Atoms[i].print(OS);
}
+
void dump() { print(dbgs()); }
#endif
};
@@ -168,11 +165,12 @@ public:
char Flags; // Specific flags to output
HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
+
#ifndef NDEBUG
- void print(raw_ostream &O) const {
- O << " Offset: " << Die->getOffset() << "\n";
- O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
- O << " Flags: " << Flags << "\n";
+ void print(raw_ostream &OS) const {
+ OS << " Offset: " << Die->getOffset() << "\n"
+ << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"
+ << " Flags: " << Flags << "\n";
}
#endif
};
@@ -183,39 +181,41 @@ private:
DwarfStringPoolEntryRef Name;
std::vector<HashDataContents *> Values;
};
+
friend struct HashData;
+
struct HashData {
StringRef Str;
uint32_t HashValue;
MCSymbol *Sym;
DwarfAccelTable::DataArray &Data; // offsets
+
HashData(StringRef S, DwarfAccelTable::DataArray &Data)
: Str(S), Data(Data) {
- HashValue = DwarfAccelTable::HashDJB(S);
+ HashValue = dwarf::djbHash(S);
}
+
#ifndef NDEBUG
- void print(raw_ostream &O) {
- O << "Name: " << Str << "\n";
- O << " Hash Value: " << format("0x%x", HashValue) << "\n";
- O << " Symbol: ";
+ void print(raw_ostream &OS) {
+ OS << "Name: " << Str << "\n";
+ OS << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ OS << " Symbol: ";
if (Sym)
- O << *Sym;
+ OS << *Sym;
else
- O << "<none>";
- O << "\n";
+ OS << "<none>";
+ OS << "\n";
for (HashDataContents *C : Data.Values) {
- O << " Offset: " << C->Die->getOffset() << "\n";
- O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
- O << " Flags: " << C->Flags << "\n";
+ OS << " Offset: " << C->Die->getOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
+ OS << " Flags: " << C->Flags << "\n";
}
}
+
void dump() { print(dbgs()); }
#endif
};
- DwarfAccelTable(const DwarfAccelTable &) = delete;
- void operator=(const DwarfAccelTable &) = delete;
-
// Internal Functions
void EmitHeader(AsmPrinter *);
void EmitBuckets(AsmPrinter *);
@@ -231,25 +231,31 @@ private:
TableHeaderData HeaderData;
std::vector<HashData *> Data;
- typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
+ using StringEntries = StringMap<DataArray, BumpPtrAllocator &>;
+
StringEntries Entries;
// Buckets/Hashes/Offsets
- typedef std::vector<HashData *> HashList;
- typedef std::vector<HashList> BucketList;
+ using HashList = std::vector<HashData *>;
+ using BucketList = std::vector<HashList>;
BucketList Buckets;
HashList Hashes;
// Public Implementation
public:
DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
+ DwarfAccelTable(const DwarfAccelTable &) = delete;
+ DwarfAccelTable &operator=(const DwarfAccelTable &) = delete;
+
void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
#ifndef NDEBUG
- void print(raw_ostream &O);
+ void print(raw_ostream &OS);
void dump() { print(dbgs()); }
#endif
};
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index dd7f7931b06b..cbb4c48b4d88 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,13 +12,12 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
@@ -31,11 +30,7 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
@@ -92,7 +87,7 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MF->getLandingPads().empty();
@@ -105,17 +100,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
const Function *Per = nullptr;
- if (F->hasPersonalityFn())
- Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn())
+ Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
// Emit a personality function even when there are no landing pads
forceEmitPersonality =
// ...if a personality function is explicitly specified
- F->hasPersonalityFn() &&
+ F.hasPersonalityFn() &&
// ... and it's not known to be a noop in the absence of invokes
!isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
// ... and we're not explicitly asked not to emit it
- F->needsUnwindTableEntry();
+ F.needsUnwindTableEntry();
shouldEmitPersonality =
(forceEmitPersonality ||
@@ -148,8 +143,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!shouldEmitPersonality)
return;
- auto *F = MBB->getParent()->getFunction();
- auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ auto &F = MBB->getParent()->getFunction();
+ auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
assert(P && "Expected personality function");
// If we are forced to emit this personality, make sure to record
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 333d14a11af5..c8cd8eb8ffd3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===//
+//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,38 +12,58 @@
//===----------------------------------------------------------------------===//
#include "DwarfCompileUnit.h"
+#include "AddressPool.h"
+#include "DwarfDebug.h"
#include "DwarfExpression.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
-namespace llvm {
+using namespace llvm;
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU)
- : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID),
- Skeleton(nullptr), BaseAddress(nullptr) {
+ : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) {
insertDIE(Node, &getUnitDie());
MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
/// addLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
-
// Don't use the address pool in non-fission or in the skeleton unit itself.
// FIXME: Once GDB supports this, it's probably worthwhile using the address
// pool from the skeleton - maybe even in non-fission (possibly fewer
@@ -336,23 +356,15 @@ void DwarfCompileUnit::constructScopeDIE(
if (DD->isLexicalScopeDIENull(Scope))
return;
- unsigned ChildScopeCount;
+ bool HasNonScopeChildren = false;
// We create children here when we know the scope DIE is not going to be
// null and the children will be added to the scope DIE.
- createScopeChildrenDIE(Scope, Children, &ChildScopeCount);
-
- // Skip imported directives in gmlt-like data.
- if (!includeMinimalInlineScopes()) {
- // There is no need to emit empty lexical block DIE.
- for (const auto *IE : ImportedEntities[DS])
- Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
+ createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
// If there are only other scopes as children, put them directly in the
// parent instead, as this scope would serve no purpose.
- if (Children.size() == ChildScopeCount) {
+ if (!HasNonScopeChildren) {
FinalChildren.insert(FinalChildren.end(),
std::make_move_iterator(Children.begin()),
std::make_move_iterator(Children.end()));
@@ -488,14 +500,12 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
if (const MachineInstr *DVInsn = DV.getMInsn()) {
assert(DVInsn->getNumOperands() == 4);
if (DVInsn->getOperand(0).isReg()) {
- const MachineOperand RegOp = DVInsn->getOperand(0);
+ auto RegOp = DVInsn->getOperand(0);
+ auto Op1 = DVInsn->getOperand(1);
// If the second operand is an immediate, this is an indirect value.
- if (DVInsn->getOperand(1).isImm()) {
- MachineLocation Location(RegOp.getReg(),
- DVInsn->getOperand(1).getImm());
- addVariableAddress(DV, *VariableDie, Location);
- } else if (RegOp.getReg())
- addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
+ assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+ MachineLocation Location(RegOp.getReg(), Op1.isImm());
+ addVariableAddress(DV, *VariableDie, Location);
} else if (DVInsn->getOperand(0).isImm()) {
// This variable is described by a single constant.
// Check whether it has a DIExpression.
@@ -557,20 +567,27 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
- unsigned *ChildScopeCount) {
+ bool *HasNonScopeChildren) {
+ assert(Children.empty());
DIE *ObjectPointer = nullptr;
for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
- unsigned ChildCountWithoutScopes = Children.size();
+ // Skip imported directives in gmlt-like data.
+ if (!includeMinimalInlineScopes()) {
+ // There is no need to emit empty lexical block DIE.
+ for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
+ Children.push_back(
+ constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+ }
+
+ if (HasNonScopeChildren)
+ *HasNonScopeChildren = !Children.empty();
for (LexicalScope *LS : Scope->getChildren())
constructScopeDIE(LS, Children);
- if (ChildScopeCount)
- *ChildScopeCount = Children.size() - ChildCountWithoutScopes;
-
return ObjectPointer;
}
@@ -726,7 +743,7 @@ DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
LexicalScope *Scope) {
assert(Scope && Scope->isAbstractScope());
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
+ auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr);
DU->addScopeVariable(Scope, AbsDbgVariable.get());
getAbstractVariables()[Var] = std::move(AbsDbgVariable);
}
@@ -744,10 +761,19 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
DwarfUnit::emitCommonHeader(UseOffsets, UT);
}
+bool DwarfCompileUnit::hasDwarfPubSections() const {
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ if (CUNode->getGnuPubnames())
+ return true;
+
+ return DD->tuneForGDB() && !includeMinimalInlineScopes();
+}
+
/// addGlobalName - Add a new global name to the compile unit.
void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
- if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Name.str();
GlobalNames[FullName] = &Die;
@@ -755,7 +781,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
const DIScope *Context) {
- if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Name.str();
// Insert, allowing the entry to remain as-is if it's already present
@@ -768,7 +794,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
- if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
GlobalTypes[FullName] = &Die;
@@ -776,7 +802,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
const DIScope *Context) {
- if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ if (!hasDwarfPubSections())
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
// Insert, allowing the entry to remain as-is if it's already present
@@ -790,6 +816,12 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
+ // addBlockByrefAddress is obsolete and will be removed soon.
+ // The clang frontend always generates block byref variables with a
+ // complex expression that encodes exactly what addBlockByrefAddress
+ // would do.
+ assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) &&
+ "block byref variable without a complex expression");
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
else if (DV.isBlockByrefVariable())
@@ -806,12 +838,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
- SmallVector<uint64_t, 8> Ops;
- if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Location.getOffset());
- }
- DIExpressionCursor Cursor(Ops);
+ DIExpressionCursor Cursor({});
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -835,13 +862,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
- SmallVector<uint64_t, 8> Ops;
- if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Location.getOffset());
- }
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
- DIExpressionCursor Cursor(Ops);
+ DIExpressionCursor Cursor(DIExpr);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -898,4 +919,3 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
}
-} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index e38672792867..68482eb7e358 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,19 +14,32 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#include "DbgValueHistoryCalculator.h"
+#include "DwarfDebug.h"
#include "DwarfUnit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
namespace llvm {
-class StringRef;
class AsmPrinter;
-class DIE;
-class DwarfDebug;
class DwarfFile;
+class GlobalVariable;
+class MCExpr;
class MCSymbol;
-class LexicalScope;
+class MDNode;
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
@@ -37,7 +50,7 @@ class DwarfCompileUnit final : public DwarfUnit {
DIE::value_iterator StmtListValue;
/// Skeleton unit associated with this unit.
- DwarfCompileUnit *Skeleton;
+ DwarfCompileUnit *Skeleton = nullptr;
/// The start of the unit within its section.
MCSymbol *LabelBegin;
@@ -45,9 +58,8 @@ class DwarfCompileUnit final : public DwarfUnit {
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
- typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
- typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
- ImportedEntityMap;
+ using ImportedEntityList = SmallVector<const MDNode *, 8>;
+ using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;
ImportedEntityMap ImportedEntities;
@@ -66,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit {
// The base address of this unit, if any. Used for relative references in
// ranges/locs.
- const MCSymbol *BaseAddress;
+ const MCSymbol *BaseAddress = nullptr;
DenseMap<const MDNode *, DIE *> AbstractSPDies;
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
@@ -164,6 +176,7 @@ public:
void attachRangesOrLowHighPC(DIE &D,
const SmallVectorImpl<InsnRange> &Ranges);
+
/// \brief This scope represents inlined body of a function. Construct
/// DIE to represent this concrete inlined copy of the function.
DIE *constructInlinedScopeDIE(LexicalScope *Scope);
@@ -181,7 +194,7 @@ public:
/// A helper function to create children of a Scope DIE.
DIE *createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
- unsigned *ChildScopeCount = nullptr);
+ bool *HasNonScopeChildren = nullptr);
/// \brief Construct a DIE for this subprogram scope.
void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
@@ -195,8 +208,9 @@ public:
void finishSubprogramDefinition(const DISubprogram *SP);
void finishVariableDefinition(const DbgVariable &Var);
+
/// Find abstract variable associated with Var.
- typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+ using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
const DILocalVariable *&Cleansed);
DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
@@ -275,8 +289,10 @@ public:
void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
const MCSymbol *getBaseAddress() const { return BaseAddress; }
+
+ bool hasDwarfPubSections() const;
};
-} // end llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f1b4d9f20ca9..2c9c7d4f3146 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,43 +15,67 @@
#include "ByteStreamer.h"
#include "DIEHash.h"
#include "DebugLocEntry.h"
+#include "DebugLocStream.h"
+#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
+#include "DwarfFile.h"
#include "DwarfUnit.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -61,10 +85,9 @@ static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
-static cl::opt<bool>
-GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
- cl::desc("Generate GNU-style pubnames and pubtypes"),
- cl::init(false));
+static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
+ "use-dwarf-ranges-base-address-specifier", cl::Hidden,
+ cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
@@ -75,9 +98,7 @@ static cl::opt<bool> SplitDwarfCrossCuReferences(
"split-dwarf-cross-cu-references", cl::Hidden,
cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
-namespace {
enum DefaultOnOff { Default, Enable, Disable };
-}
static cl::opt<DefaultOnOff> UnknownLocations(
"use-unknown-locations", cl::Hidden,
@@ -94,19 +115,12 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
clEnumVal(Disable, "Disabled")),
cl::init(Default));
-static cl::opt<DefaultOnOff>
-DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
- cl::desc("Generate DWARF pubnames and pubtypes sections"),
- cl::values(clEnumVal(Default, "Default for platform"),
- clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled")),
- cl::init(Default));
-
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
AbstractLinkageNames
};
+
static cl::opt<LinkageNameOption>
DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
cl::desc("Which DWARF linkage-name attributes to emit."),
@@ -142,8 +156,6 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
return false;
}
-//===----------------------------------------------------------------------===//
-
bool DbgVariable::isBlockByrefVariable() const {
assert(Var && "Invalid complex DbgVariable!");
return Var->getType().resolve()->isBlockByrefStruct();
@@ -198,17 +210,54 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
if (FrameIndexExprs.size() == 1)
return FrameIndexExprs;
- assert(all_of(FrameIndexExprs,
- [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
+ assert(llvm::all_of(FrameIndexExprs,
+ [](const FrameIndexExpr &A) {
+ return A.Expr->isFragment();
+ }) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
B.Expr->getFragmentInfo()->OffsetInBits;
});
+
return FrameIndexExprs;
}
+void DbgVariable::addMMIEntry(const DbgVariable &V) {
+ assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
+ assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+ assert(V.Var == Var && "conflicting variable");
+ assert(V.IA == IA && "conflicting inlined-at location");
+
+ assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+ assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
+
+ // FIXME: This logic should not be necessary anymore, as we now have proper
+ // deduplication. However, without it, we currently run into the assertion
+ // below, which means that we are likely dealing with broken input, i.e. two
+ // non-fragment entries for the same variable at different frame indices.
+ if (FrameIndexExprs.size()) {
+ auto *Expr = FrameIndexExprs.back().Expr;
+ if (!Expr || !Expr->isFragment())
+ return;
+ }
+
+ for (const auto &FIE : V.FrameIndexExprs)
+ // Ignore duplicate entries.
+ if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
+ return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
+ }))
+ FrameIndexExprs.push_back(FIE);
+
+ assert((FrameIndexExprs.size() == 1 ||
+ llvm::all_of(FrameIndexExprs,
+ [](FrameIndexExpr &FIE) {
+ return FIE.Expr && FIE.Expr->isFragment();
+ })) &&
+ "conflicting locations for variable");
+}
+
static const DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
@@ -225,9 +274,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
dwarf::DW_FORM_data4)),
AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
- AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
-
- CurFn = nullptr;
+ AccelTypes(TypeAtoms) {
const Triple &TT = Asm->TM.getTargetTriple();
// Make sure we know our "debugger tuning." The target option takes
@@ -278,7 +325,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
-DwarfDebug::~DwarfDebug() { }
+DwarfDebug::~DwarfDebug() = default;
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
@@ -389,20 +436,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
-bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const {
- // Opting in to GNU Pubnames/types overrides the default to ensure these are
- // generated for things like Gold's gdb_index generation.
- if (GenerateGnuPubSections)
- return true;
-
- if (DwarfPubSections == Default)
- return tuneForGDB() && !includeMinimalInlineScopes;
-
- return DwarfPubSections == Enable;
-}
-
void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
- if (!hasDwarfPubSections(U.includeMinimalInlineScopes()))
+ if (!U.hasDwarfPubSections())
return;
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
@@ -417,7 +452,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
StringRef FN = DIUnit->getFilename();
CompilationDir = DIUnit->getDirectory();
- auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
DIE &Die = NewCU.getUnitDie();
@@ -428,6 +463,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
Asm->TM.Options.MCOptions.SplitDwarfFile);
}
+ for (auto *IE : DIUnit->getImportedEntities())
+ NewCU.addImportedEntity(IE);
+
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
// explicitly describe the directory of all files, never relying on the
@@ -494,6 +532,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity *N) {
+ if (isa<DILocalScope>(N->getScope()))
+ return;
if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
D->addChild(TheCU.constructImportedEntityDIE(N));
}
@@ -503,13 +543,18 @@ static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
std::sort(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
- if (A.Expr != B.Expr && A.Expr && B.Expr) {
- auto FragmentA = A.Expr->getFragmentInfo();
- auto FragmentB = B.Expr->getFragmentInfo();
- if (FragmentA && FragmentB)
- return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
- }
- return false;
+ // Sort order: first null exprs, then exprs without fragment
+ // info, then sort by fragment offset in bits.
+ // FIXME: Come up with a more comprehensive comparator so
+ // the sorting isn't non-deterministic, and so the following
+ // std::unique call works correctly.
+ if (!A.Expr || !B.Expr)
+ return !!B.Expr;
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (!FragmentA || !FragmentB)
+ return !!FragmentB;
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
});
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
@@ -546,18 +591,31 @@ void DwarfDebug::beginModule() {
}
for (DICompileUnit *CUNode : M->debug_compile_units()) {
- if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
- CUNode->getGlobalVariables().empty() &&
- CUNode->getImportedEntities().empty() && CUNode->getMacros().empty())
+ // FIXME: Move local imported entities into a list attached to the
+ // subprogram, then this search won't be needed and a
+ // getImportedEntities().empty() test should go below with the rest.
+ bool HasNonLocalImportedEntities = llvm::any_of(
+ CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
+ return !isa<DILocalScope>(IE->getScope());
+ });
+
+ if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
+ CUNode->getRetainedTypes().empty() &&
+ CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
continue;
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
- for (auto *IE : CUNode->getImportedEntities())
- CU.addImportedEntity(IE);
// Global Variables.
- for (auto *GVE : CUNode->getGlobalVariables())
- GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
+ for (auto *GVE : CUNode->getGlobalVariables()) {
+ // Don't bother adding DIGlobalVariableExpressions listed in the CU if we
+ // already know about the variable and it isn't adding a constant
+ // expression.
+ auto &GVMapEntry = GVMap[GVE->getVariable()];
+ auto *Expr = GVE->getExpression();
+ if (!GVMapEntry.size() || (Expr && Expr->isConstant()))
+ GVMapEntry.push_back({nullptr, Expr});
+ }
DenseSet<DIGlobalVariable *> Processed;
for (auto *GVE : CUNode->getGlobalVariables()) {
DIGlobalVariable *GV = GVE->getVariable();
@@ -682,6 +740,11 @@ void DwarfDebug::finalizeModuleInfo() {
TLOF.getDwarfMacinfoSection()->getBeginSymbol());
}
+ // Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
+ for (auto *CUNode : MMI->getModule()->debug_compile_units())
+ if (CUNode->getDWOId())
+ getOrCreateDwarfCompileUnit(CUNode);
+
// Compute DIE offsets and sizes.
InfoHolder.computeSizeAndOffsets();
if (useSplitDwarf())
@@ -744,12 +807,7 @@ void DwarfDebug::endModule() {
}
// Emit the pubnames and pubtypes sections if requested.
- // The condition is optimistically correct - any CU not using GMLT (&
- // implicit/default pubnames state) might still have pubnames.
- if (hasDwarfPubSections(/* gmlt */ false)) {
- emitDebugPubNames(GenerateGnuPubSections);
- emitDebugPubTypes(GenerateGnuPubSections);
- }
+ emitDebugPubSections();
// clean up.
// FIXME: AbstractVariables.clear();
@@ -775,9 +833,11 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
CU.createAbstractVariable(Cleansed, Scope);
}
+
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
+ SmallDenseMap<InlinedVariable, DbgVariable *> MFVars;
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
@@ -793,26 +853,28 @@ void DwarfDebug::collectVariableInfoFromMFTable(
continue;
ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
- auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
+ auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
- if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+ if (DbgVariable *DbgVar = MFVars.lookup(Var))
+ DbgVar->addMMIEntry(*RegVar);
+ else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
+ MFVars.insert({Var, RegVar.get()});
ConcreteVariables.push_back(std::move(RegVar));
+ }
}
}
// Get .debug_loc entry for the instruction range starting at MI.
static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
-
assert(MI->getNumOperands() == 4);
if (MI->getOperand(0).isReg()) {
- MachineLocation MLoc;
+ auto RegOp = MI->getOperand(0);
+ auto Op1 = MI->getOperand(1);
// If the second operand is an immediate, this is a
// register-indirect address.
- if (!MI->getOperand(1).isImm())
- MLoc.set(MI->getOperand(0).getReg());
- else
- MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+ MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
return DebugLocEntry::Value(Expr, MLoc);
}
if (MI->getOperand(0).isImm())
@@ -967,7 +1029,8 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
LexicalScope &Scope,
InlinedVariable IV) {
ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
+ ConcreteVariables.push_back(
+ llvm::make_unique<DbgVariable>(IV.first, IV.second));
InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
return ConcreteVariables.back().get();
}
@@ -1100,7 +1163,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
- const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram();
+ const auto *SP = MI->getMF()->getFunction().getSubprogram();
if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
@@ -1198,7 +1261,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
- auto *SP = MF->getFunction()->getSubprogram();
+ auto *SP = MF->getFunction().getSubprogram();
assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
@@ -1234,7 +1297,7 @@ void DwarfDebug::skippedNonDebugFunction() {
// Gather and emit post-function debug information.
void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
- const DISubprogram *SP = MF->getFunction()->getSubprogram();
+ const DISubprogram *SP = MF->getFunction().getSubprogram();
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
@@ -1309,8 +1372,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
if (auto *Scope = cast_or_null<DIScope>(S)) {
Fn = Scope->getFilename();
Dir = Scope->getDirectory();
- if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
- if (getDwarfVersion() >= 4)
+ if (Line != 0 && getDwarfVersion() >= 4)
+ if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
Discriminator = LBF->getDiscriminator();
unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
@@ -1440,84 +1503,74 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
}
}
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
- MCSection *PSec = GnuStyle
- ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
- : Asm->getObjFileLowering().getDwarfPubNamesSection();
-
- emitDebugPubSection(GnuStyle, PSec, "Names",
- &DwarfCompileUnit::getGlobalNames);
-}
-
-void DwarfDebug::emitDebugPubSection(
- bool GnuStyle, MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
+/// emitDebugPubSections - Emit visible names and types into debug pubnames and
+/// pubtypes sections.
+void DwarfDebug::emitDebugPubSections() {
for (const auto &NU : CUMap) {
DwarfCompileUnit *TheU = NU.second;
-
- const auto &Globals = (TheU->*Accessor)();
-
- if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes()))
+ if (!TheU->hasDwarfPubSections())
continue;
- if (auto *Skeleton = TheU->getSkeleton())
- TheU = Skeleton;
+ bool GnuStyle = TheU->getCUNode()->getGnuPubnames();
- // Start the dwarf pubnames section.
- Asm->OutStreamer->SwitchSection(PSec);
+ Asm->OutStreamer->SwitchSection(
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+ : Asm->getObjFileLowering().getDwarfPubNamesSection());
+ emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
- // Emit the header.
- Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
- MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
- MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
- Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->OutStreamer->SwitchSection(
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+ : Asm->getObjFileLowering().getDwarfPubTypesSection());
+ emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
+ }
+}
- Asm->OutStreamer->EmitLabel(BeginLabel);
+void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
+ DwarfCompileUnit *TheU,
+ const StringMap<const DIE *> &Globals) {
+ if (auto *Skeleton = TheU->getSkeleton())
+ TheU = Skeleton;
- Asm->OutStreamer->AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
+ // Emit the header.
+ Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
+ MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+ MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
- Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
- Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
+ Asm->OutStreamer->EmitLabel(BeginLabel);
- Asm->OutStreamer->AddComment("Compilation Unit Length");
- Asm->EmitInt32(TheU->getLength());
+ Asm->OutStreamer->AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
- // Emit the pubnames for this compilation unit.
- for (const auto &GI : Globals) {
- const char *Name = GI.getKeyData();
- const DIE *Entity = GI.second;
+ Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+ Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
- Asm->OutStreamer->AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ Asm->OutStreamer->AddComment("Compilation Unit Length");
+ Asm->EmitInt32(TheU->getLength());
- if (GnuStyle) {
- dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
- Asm->OutStreamer->AddComment(
- Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
- dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
- Asm->EmitInt8(Desc.toBits());
- }
+ // Emit the pubnames for this compilation unit.
+ for (const auto &GI : Globals) {
+ const char *Name = GI.getKeyData();
+ const DIE *Entity = GI.second;
+
+ Asm->OutStreamer->AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
- Asm->OutStreamer->AddComment("External Name");
- Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+ if (GnuStyle) {
+ dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
+ Asm->OutStreamer->AddComment(
+ Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
+ dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Asm->EmitInt8(Desc.toBits());
}
- Asm->OutStreamer->AddComment("End Mark");
- Asm->EmitInt32(0);
- Asm->OutStreamer->EmitLabel(EndLabel);
+ Asm->OutStreamer->AddComment("External Name");
+ Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
}
-}
-
-void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
- MCSection *PSec = GnuStyle
- ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
- : Asm->getObjFileLowering().getDwarfPubTypesSection();
- emitDebugPubSection(GnuStyle, PSec, "Types",
- &DwarfCompileUnit::getGlobalTypes);
+ Asm->OutStreamer->AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer->EmitLabel(EndLabel);
}
/// Emit null-terminated strings into a debug str section.
@@ -1553,13 +1606,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
MachineLocation Location = Value.getLoc();
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
- SmallVector<uint64_t, 8> Ops;
- if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Location.getOffset());
- }
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
- DIExpressionCursor Cursor(Ops);
+ DIExpressionCursor Cursor(DIExpr);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -1580,7 +1627,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
const DebugLocEntry::Value &Value = Values[0];
if (Value.isFragment()) {
// Emit all fragments that belong to the same variable and range.
- assert(all_of(Values, [](DebugLocEntry::Value P) {
+ assert(llvm::all_of(Values, [](DebugLocEntry::Value P) {
return P.isFragment();
}) && "all values are expected to be fragments");
assert(std::is_sorted(Values.begin(), Values.end()) &&
@@ -1844,17 +1891,49 @@ void DwarfDebug::emitDebugRanges() {
// Emit our symbol so we can find the beginning of the range.
Asm->OutStreamer->EmitLabel(List.getSym());
+ // Gather all the ranges that apply to the same section so they can share
+ // a base address entry.
+ MapVector<const MCSection *, std::vector<const RangeSpan *>> MV;
for (const RangeSpan &Range : List.getRanges()) {
- const MCSymbol *Begin = Range.getStart();
- const MCSymbol *End = Range.getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- if (auto *Base = TheCU->getBaseAddress()) {
- Asm->EmitLabelDifference(Begin, Base, Size);
- Asm->EmitLabelDifference(End, Base, Size);
- } else {
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
- Asm->OutStreamer->EmitSymbolValue(End, Size);
+ MV[&Range.getStart()->getSection()].push_back(&Range);
+ }
+
+ auto *CUBase = TheCU->getBaseAddress();
+ bool BaseIsSet = false;
+ for (const auto &P : MV) {
+ // Don't bother with a base address entry if there's only one range in
+ // this section in this range list - for example ranges for a CU will
+ // usually consist of single regions from each of many sections
+ // (-ffunction-sections, or just C++ inline functions) except under LTO
+ // or optnone where there may be holes in a single CU's section
+ // contrubutions.
+ auto *Base = CUBase;
+ if (!Base && P.second.size() > 1 &&
+ UseDwarfRangesBaseAddressSpecifier) {
+ BaseIsSet = true;
+ // FIXME/use care: This may not be a useful base address if it's not
+ // the lowest address/range in this object.
+ Base = P.second.front()->getStart();
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ } else if (BaseIsSet) {
+ BaseIsSet = false;
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+
+ for (const auto *RS : P.second) {
+ const MCSymbol *Begin = RS->getStart();
+ const MCSymbol *End = RS->getEnd();
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
+ if (Base) {
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
+ } else {
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->EmitSymbolValue(End, Size);
+ }
}
}
@@ -1943,7 +2022,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
// DW_AT_addr_base, DW_AT_ranges_base.
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
- auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
@@ -2024,8 +2103,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
- getDwoLineTable(CU));
+ auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 78ee9a162029..2ae0b418a91e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,40 +14,52 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+#include "AddressPool.h"
#include "DbgValueHistoryCalculator.h"
#include "DebugHandlerBase.h"
#include "DebugLocStream.h"
#include "DwarfAccelTable.h"
#include "DwarfFile.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/DIE.h"
-#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
#include <memory>
+#include <utility>
+#include <vector>
namespace llvm {
class AsmPrinter;
class ByteStreamer;
-class ConstantInt;
-class ConstantFP;
class DebugLocEntry;
+class DIE;
class DwarfCompileUnit;
-class DwarfDebug;
class DwarfTypeUnit;
class DwarfUnit;
-class MachineModuleInfo;
+class LexicalScope;
+class MachineFunction;
+class MCSection;
+class MCSymbol;
+class MDNode;
+class Module;
//===----------------------------------------------------------------------===//
/// This class is used to track local variable information.
@@ -89,7 +101,7 @@ public:
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
- assert(FI != INT_MAX && "Expected valid index");
+ assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
FrameIndexExprs.push_back({FI, E});
}
@@ -111,10 +123,12 @@ public:
// Accessors.
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
+
const DIExpression *getSingleExpression() const {
assert(MInsn && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
+
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
@@ -124,30 +138,7 @@ public:
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
-
- void addMMIEntry(const DbgVariable &V) {
- assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
- assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
- assert(V.Var == Var && "conflicting variable");
- assert(V.IA == IA && "conflicting inlined-at location");
-
- assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
- assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
-
- if (FrameIndexExprs.size()) {
- auto *Expr = FrameIndexExprs.back().Expr;
- // Get rid of duplicate non-fragment entries. More than one non-fragment
- // dbg.declare makes no sense so ignore all but the first.
- if (!Expr || !Expr->isFragment())
- return;
- }
- FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
- assert(all_of(FrameIndexExprs,
- [](FrameIndexExpr &FIE) {
- return FIE.Expr && FIE.Expr->isFragment();
- }) &&
- "conflicting locations for variable");
- }
+ void addMMIEntry(const DbgVariable &V);
// Translate tag to proper Dwarf tag.
dwarf::Tag getTag() const {
@@ -157,6 +148,7 @@ public:
return dwarf::DW_TAG_variable;
}
+
/// Return true if DbgVariable is artificial.
bool isArtificial() const {
if (Var->isArtificial())
@@ -182,6 +174,7 @@ public:
"Invalid Expr for DBG_VALUE");
return !FrameIndexExprs.empty();
}
+
bool isBlockByrefVariable() const;
const DIType *getType() const;
@@ -191,10 +184,10 @@ private:
}
};
-
/// Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+
const MCSymbol *Sym;
DwarfCompileUnit *CU;
};
@@ -230,7 +223,7 @@ class DwarfDebug : public DebugHandlerBase {
ProcessedSPNodes;
/// If nonnull, stores the current machine function we're processing.
- const MachineFunction *CurFn;
+ const MachineFunction *CurFn = nullptr;
/// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
@@ -296,17 +289,7 @@ class DwarfDebug : public DebugHandlerBase {
DwarfAccelTable AccelTypes;
// Identify a debugger for "tuning" the debug info.
- DebuggerKind DebuggerTuning;
-
- /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
- ///
- /// Returns whether we are "tuning" for a given debugger.
- /// Should be used only within the constructor, to set feature flags.
- /// @{
- bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
- bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
- bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
- /// @}
+ DebuggerKind DebuggerTuning = DebuggerKind::Default;
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
@@ -314,7 +297,7 @@ class DwarfDebug : public DebugHandlerBase {
return InfoHolder.getUnits();
}
- typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+ using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var,
const MDNode *Scope);
@@ -358,21 +341,12 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit type dies into a hashed accelerator table.
void emitAccelTypes();
- /// Emit visible names into a debug pubnames section.
- /// \param GnuStyle determines whether or not we want to emit
- /// additional information into the table ala newer gcc for gdb
- /// index.
- void emitDebugPubNames(bool GnuStyle = false);
+ /// Emit visible names and types into debug pubnames and pubtypes sections.
+ void emitDebugPubSections();
- /// Emit visible types into a debug pubtypes section.
- /// \param GnuStyle determines whether or not we want to emit
- /// additional information into the table ala newer gcc for gdb
- /// index.
- void emitDebugPubTypes(bool GnuStyle = false);
-
- void emitDebugPubSection(
- bool GnuStyle, MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
+ void emitDebugPubSection(bool GnuStyle, StringRef Name,
+ DwarfCompileUnit *TheU,
+ const StringMap<const DIE *> &Globals);
/// Emit null-terminated strings into a debug str section.
void emitDebugStr();
@@ -561,11 +535,19 @@ public:
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
- bool hasDwarfPubSections(bool includeMinimalInlineScopes) const;
-
/// Find the matching DwarfCompileUnit for the given CU DIE.
DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); }
+
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ /// @}
};
-} // End of namespace llvm
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 3a8568cf39ae..68d25fe37b43 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===//
+//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,13 +12,15 @@
//===----------------------------------------------------------------------===//
#include "DwarfExpression.h"
-#include "DwarfDebug.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -128,6 +130,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned Offset = TRI.getSubRegIdxOffset(Idx);
Reg = TRI.getDwarfRegNum(*SR, false);
+ if (Reg < 0)
+ continue;
// Intersection between the bits we already emitted and the bits
// covered by this subregister.
@@ -136,14 +140,14 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// If this sub-register has a DWARF number and we haven't covered
// its range, emit a DWARF piece for it.
- if (Reg >= 0 && CurSubReg.test(Coverage)) {
+ if (CurSubReg.test(Coverage)) {
// Emit a piece for any gap in the coverage.
if (Offset > CurPos)
DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
DwarfRegs.push_back(
{Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
if (Offset >= MaxSize)
- break;
+ break;
// Mark it as emitted.
Coverage.set(Offset, Offset + Size);
@@ -336,9 +340,10 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
break;
case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
+ case dwarf::DW_OP_mul:
emitOp(Op->getOp());
break;
- case dwarf::DW_OP_deref: {
+ case dwarf::DW_OP_deref:
assert(LocationKind != Register);
if (LocationKind != Memory && isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
@@ -347,7 +352,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
else
emitOp(dwarf::DW_OP_deref);
break;
- }
case dwarf::DW_OP_constu:
assert(LocationKind != Register);
emitOp(dwarf::DW_OP_constu);
@@ -383,7 +387,6 @@ void DwarfExpression::maskSubRegister() {
addAnd(Mask);
}
-
void DwarfExpression::finalize() {
assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
// Emit any outstanding DW_OP_piece operations to mask out subregisters.
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 728f8ad9225b..ea5cbc40ba35 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===//
+//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,21 +14,29 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
namespace llvm {
class AsmPrinter;
+class APInt;
class ByteStreamer;
-class TargetRegisterInfo;
class DwarfUnit;
class DIELoc;
+class TargetRegisterInfo;
/// Holds a DIExpression and keeps track of how many operands have been consumed
/// so far.
class DIExpressionCursor {
DIExpression::expr_op_iterator Start, End;
+
public:
DIExpressionCursor(const DIExpression *Expr) {
if (!Expr) {
@@ -42,8 +50,7 @@ public:
DIExpressionCursor(ArrayRef<uint64_t> Expr)
: Start(Expr.begin()), End(Expr.end()) {}
- DIExpressionCursor(const DIExpressionCursor &C)
- : Start(C.Start), End(C.End) {}
+ DIExpressionCursor(const DIExpressionCursor &) = default;
/// Consume one operation.
Optional<DIExpression::ExprOperand> take() {
@@ -73,8 +80,10 @@ public:
return *Next;
}
+
/// Determine whether there are any operations left in this expression.
operator bool() const { return Start != End; }
+
DIExpression::expr_op_iterator begin() const { return Start; }
DIExpression::expr_op_iterator end() const { return End; }
@@ -122,10 +131,13 @@ protected:
/// Output a dwarf operand and an optional assembler comment.
virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+
/// Emit a raw signed value.
virtual void emitSigned(int64_t Value) = 0;
+
/// Emit a raw unsigned value.
virtual void emitUnsigned(uint64_t Value) = 0;
+
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
@@ -133,8 +145,10 @@ protected:
/// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
/// register location description.
void addReg(int DwarfReg, const char *Comment = nullptr);
+
/// Emit a DW_OP_breg operation.
void addBReg(int DwarfReg, int Offset);
+
/// Emit DW_OP_fbreg <Offset>.
void addFBReg(int Offset);
@@ -156,7 +170,6 @@ protected:
bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
unsigned MaxSize = ~1U);
-
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
/// \param OffsetInBits This is an optional offset into the location that
/// is at the top of the DWARF stack.
@@ -164,6 +177,7 @@ protected:
/// Emit a shift-right dwarf operation.
void addShr(unsigned ShiftBy);
+
/// Emit a bitwise and dwarf operation.
void addAnd(unsigned Mask);
@@ -181,6 +195,7 @@ protected:
void addStackValue();
~DwarfExpression() = default;
+
public:
DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
@@ -189,8 +204,10 @@ public:
/// Emit a signed constant.
void addSignedConstant(int64_t Value);
+
/// Emit an unsigned constant.
void addUnsignedConstant(uint64_t Value);
+
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
@@ -213,6 +230,7 @@ public:
bool addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
+
/// Emit all remaining operations in the DIExpressionCursor.
///
/// \param FragmentOffsetInBits If this is one fragment out of multiple
@@ -235,6 +253,7 @@ class DebugLocDwarfExpression final : public DwarfExpression {
void emitUnsigned(uint64_t Value) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+
public:
DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
: DwarfExpression(DwarfVersion), BS(BS) {}
@@ -253,11 +272,13 @@ const AsmPrinter &AP;
unsigned MachineReg) override;
public:
DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+
DIELoc *finalize() {
DwarfExpression::finalize();
return &DIE;
}
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 595f1d91c4bf..3c04c969192d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===//
+//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,13 +11,16 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cstdint>
+
+using namespace llvm;
-namespace llvm {
DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
: Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
@@ -113,4 +116,3 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
Vars.push_back(Var);
return true;
}
-}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 54924e9806ed..167ca13c19c1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===//
+//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,30 +10,25 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
-#include "AddressPool.h"
#include "DwarfStringPool.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include <memory>
+#include <utility>
namespace llvm {
+
class AsmPrinter;
class DbgVariable;
class DwarfCompileUnit;
class DwarfUnit;
-class DIEAbbrev;
-class MCSymbol;
-class DIE;
class LexicalScope;
-class StringRef;
-class DwarfDebug;
class MCSection;
-class MDNode;
+
class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
@@ -106,6 +101,7 @@ public:
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
return AbstractSPDies;
}
+
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
return AbstractVariables;
}
@@ -113,9 +109,12 @@ public:
void insertDIE(const MDNode *TypeMD, DIE *Die) {
DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
}
+
DIE *getDIE(const MDNode *TypeMD) {
return DITypeNodeToDieMap.lookup(TypeMD);
}
};
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 2066f745e318..aa5f01e88933 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===//
+//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,9 +8,14 @@
//===----------------------------------------------------------------------===//
#include "DwarfStringPool.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include <cassert>
+#include <utility>
using namespace llvm;
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 93a168485a54..1cac3b7c8432 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===//
+//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,29 +11,28 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/Support/Allocator.h"
-#include <utility>
namespace llvm {
class AsmPrinter;
-class MCSymbol;
class MCSection;
-class StringRef;
// Collection of strings for this unit and assorted symbols.
// A String->Symbol mapping of strings used by indirect
// references.
class DwarfStringPool {
- typedef DwarfStringPoolEntry EntryTy;
+ using EntryTy = DwarfStringPoolEntry;
+
StringMap<EntryTy, BumpPtrAllocator &> Pool;
StringRef Prefix;
unsigned NumBytes = 0;
bool ShouldCreateSymbols;
public:
- typedef DwarfStringPoolEntryRef EntryRef;
+ using EntryRef = DwarfStringPoolEntryRef;
DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
@@ -45,5 +44,7 @@ public:
/// Get a reference to an entry in the string pool.
EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
};
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4f4ebfc56297..911e46235781 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -22,6 +22,9 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
@@ -33,9 +36,6 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -473,11 +473,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
- SmallVector<uint64_t, 9> Ops;
- if (Location.isIndirect() && Location.getOffset()) {
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Location.getOffset());
- }
+ SmallVector<uint64_t, 6> Ops;
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
@@ -964,8 +960,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
// inside C++ composite types to point to the base class with the vtable.
- if (auto *ContainingType =
- dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder())))
+ // Rust uses DW_AT_containing_type to link a vtable to the type
+ // for which it was created.
+ if (auto *ContainingType = resolve(CTy->getVTableHolder()))
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(ContainingType));
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e14d5be1177a..3cdab57bca70 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
+//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,22 +12,34 @@
//===----------------------------------------------------------------------===//
#include "EHStreamer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
using namespace llvm;
EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
-EHStreamer::~EHStreamer() {}
+EHStreamer::~EHStreamer() = default;
/// How many leading type ids two landing pads have in common.
unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
@@ -50,7 +62,6 @@ unsigned EHStreamer::
computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions) {
-
// The action table follows the call-site table in the LSDA. The individual
// records are of two types:
//
@@ -478,13 +489,14 @@ void EHStreamer::emitExceptionTable() {
sizeof(int8_t) + // TType format
(HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
TTypeBaseOffset; // TType base offset
- unsigned SizeAlign = (4 - TotalSize) & 3;
+ unsigned PadBytes = (4 - TotalSize) & 3;
if (HaveTTData) {
// Account for any extra padding that will be added to the call site table
// length.
- Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
- SizeAlign = 0;
+ Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes,
+ "@TType base offset");
+ PadBytes = 0;
}
bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
@@ -494,7 +506,9 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
// Add extra padding if it wasn't added to the TType base offset.
- Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+ Asm->EmitPaddedULEB128(CallSiteTableLength,
+ CallSiteTableLengthSize + PadBytes,
+ "Call site table length");
// Emit the landing pad site information.
unsigned idx = 0;
@@ -547,7 +561,9 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
// Add extra padding if it wasn't added to the TType base offset.
- Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+ Asm->EmitPaddedULEB128(CallSiteTableLength,
+ CallSiteTableLengthSize + PadBytes,
+ "Call site table length");
unsigned Entry = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 080fdd14b467..7962b761d8de 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -1,4 +1,4 @@
-//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,17 +16,16 @@
#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Compiler.h"
namespace llvm {
+
+class AsmPrinter;
struct LandingPadInfo;
-class MachineModuleInfo;
class MachineInstr;
-class MachineFunction;
+class MachineModuleInfo;
class MCSymbol;
-class MCSymbolRefExpr;
-
-template <typename T>
-class SmallVectorImpl;
+template <typename T> class SmallVectorImpl;
/// Emits exception handling directives.
class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler {
@@ -45,11 +44,12 @@ protected:
struct PadRange {
// The index of the landing pad.
unsigned PadIndex;
+
// The index of the begin and end labels in the landing pad's label lists.
unsigned RangeIndex;
};
- typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+ using RangeMapType = DenseMap<MCSymbol *, PadRange>;
/// Structure describing an entry in the actions table.
struct ActionEntry {
@@ -66,6 +66,7 @@ protected:
// LPad contains the landing pad start labels.
const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
+
unsigned Action;
};
@@ -131,7 +132,7 @@ public:
/// `false' otherwise.
static bool callToNoUnwindFunction(const MachineInstr *MI);
};
-}
-#endif
+} // end namespace llvm
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index c5795559fb7d..e459c02c9a6e 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 035f1a0063aa..e0cc241dd23f 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cctype>
#include <cstddef>
#include <cstdint>
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 5d485f213573..a6a8e84a949f 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "WinException.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -20,6 +19,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
@@ -30,15 +33,9 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCWin64EH.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -66,7 +63,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
bool hasLandingPads = !MF->getLandingPads().empty();
bool hasEHFunclets = MF->hasEHFunclets();
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
@@ -75,14 +72,14 @@ void WinException::beginFunction(const MachineFunction *MF) {
EHPersonality Per = EHPersonality::Unknown;
const Function *PerFn = nullptr;
- if (F->hasPersonalityFn()) {
- PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn()) {
+ PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
Per = classifyEHPersonality(PerFn);
}
- bool forceEmitPersonality = F->hasPersonalityFn() &&
+ bool forceEmitPersonality = F.hasPersonalityFn() &&
!isNoOpWithoutInvoke(Per) &&
- F->needsUnwindTableEntry();
+ F.needsUnwindTableEntry();
shouldEmitPersonality =
forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
@@ -101,7 +98,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
// functions may still refer to it.
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
StringRef FLinkageName =
- GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
}
shouldEmitLSDA = hasEHFunclets;
@@ -118,10 +115,10 @@ void WinException::endFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
return;
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
EHPersonality Per = EHPersonality::Unknown;
- if (F->hasPersonalityFn())
- Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn())
+ Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
// Get rid of any dead landing pads if we're not using funclets. In funclet
// schemes, the landing pad is not actually reachable. It only exists so
@@ -173,8 +170,8 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
// Give catches and cleanups a name based off of their parent function and
// their funclet entry block's number.
const MachineFunction *MF = MBB->getParent();
- const Function *F = MF->getFunction();
- StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+ const Function &F = MF->getFunction();
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
MCContext &Ctx = MF->getContext();
StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
@@ -186,7 +183,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
MCSymbol *Sym) {
CurrentFuncletEntry = &MBB;
- const Function *F = Asm->MF->getFunction();
+ const Function &F = Asm->MF->getFunction();
// If a symbol was not provided for the funclet, invent one.
if (!Sym) {
Sym = getMCSymbolForMBB(Asm, &MBB);
@@ -201,7 +198,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// We want our funclet's entry point to be aligned such that no nops will be
// present after the label.
Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
- F);
+ &F);
// Now that we've emitted the alignment directive, point at our funclet.
Asm->OutStreamer->EmitLabel(Sym);
@@ -218,8 +215,8 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
const Function *PerFn = nullptr;
// Determine which personality routine we are using for this funclet.
- if (F->hasPersonalityFn())
- PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn())
+ PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
const MCSymbol *PersHandlerSym =
TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
@@ -240,10 +237,10 @@ void WinException::endFunclet() {
const MachineFunction *MF = Asm->MF;
if (shouldEmitMoves || shouldEmitPersonality) {
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
EHPersonality Per = EHPersonality::Unknown;
- if (F->hasPersonalityFn())
- Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
+ if (F.hasPersonalityFn())
+ Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer->EmitWinEHHandlerData();
@@ -252,7 +249,7 @@ void WinException::endFunclet() {
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
- StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
@@ -536,7 +533,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
// Emit a label assignment with the SEH frame offset so we can use it for
// llvm.x86.seh.recoverfp.
StringRef FLinkageName =
- GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
@@ -631,11 +628,11 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
}
void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
auto &OS = *Asm->OutStreamer;
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
- StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
MCSymbol *FuncInfoXData = nullptr;
@@ -941,8 +938,8 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
/// indexed by state number instead of IP.
void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
MCStreamer &OS = *Asm->OutStreamer;
- const Function *F = MF->getFunction();
- StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+ const Function &F = MF->getFunction();
+ StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
bool VerboseAsm = OS.isVerboseAsm();
auto AddComment = [&](const Twine &Comment) {
@@ -959,7 +956,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
OS.EmitLabel(LSDALabel);
const Function *Per =
- dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
StringRef PerName = Per->getName();
int BaseState = -1;
if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index aa9c8e94d08a..7042bc997223 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1,4 +1,4 @@
-//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
+//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,31 +15,54 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
#define DEBUG_TYPE "atomic-expand"
namespace {
+
class AtomicExpand: public FunctionPass {
- const TargetLowering *TLI;
+ const TargetLowering *TLI = nullptr;
+
public:
static char ID; // Pass identification, replacement for typeid
- AtomicExpand() : FunctionPass(ID), TLI(nullptr) {
+
+ AtomicExpand() : FunctionPass(ID) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
@@ -92,39 +115,41 @@ namespace {
llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg);
};
-}
+
+} // end anonymous namespace
char AtomicExpand::ID = 0;
+
char &llvm::AtomicExpandID = AtomicExpand::ID;
+
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
false, false)
FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
-namespace {
// Helper functions to retrieve the size of atomic instructions.
-unsigned getAtomicOpSize(LoadInst *LI) {
+static unsigned getAtomicOpSize(LoadInst *LI) {
const DataLayout &DL = LI->getModule()->getDataLayout();
return DL.getTypeStoreSize(LI->getType());
}
-unsigned getAtomicOpSize(StoreInst *SI) {
+static unsigned getAtomicOpSize(StoreInst *SI) {
const DataLayout &DL = SI->getModule()->getDataLayout();
return DL.getTypeStoreSize(SI->getValueOperand()->getType());
}
-unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
+static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
const DataLayout &DL = RMWI->getModule()->getDataLayout();
return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
}
-unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
+static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
const DataLayout &DL = CASI->getModule()->getDataLayout();
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
}
// Helper functions to retrieve the alignment of atomic instructions.
-unsigned getAtomicOpAlign(LoadInst *LI) {
+static unsigned getAtomicOpAlign(LoadInst *LI) {
unsigned Align = LI->getAlignment();
// In the future, if this IR restriction is relaxed, we should
// return DataLayout::getABITypeAlignment when there's no align
@@ -133,7 +158,7 @@ unsigned getAtomicOpAlign(LoadInst *LI) {
return Align;
}
-unsigned getAtomicOpAlign(StoreInst *SI) {
+static unsigned getAtomicOpAlign(StoreInst *SI) {
unsigned Align = SI->getAlignment();
// In the future, if this IR restriction is relaxed, we should
// return DataLayout::getABITypeAlignment when there's no align
@@ -142,7 +167,7 @@ unsigned getAtomicOpAlign(StoreInst *SI) {
return Align;
}
-unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
+static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
// TODO(PR27168): This instruction has no alignment attribute, but unlike the
// default alignment for load/store, the default here is to assume
// it has NATURAL alignment, not DataLayout-specified alignment.
@@ -150,7 +175,7 @@ unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
}
-unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
+static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
// TODO(PR27168): same comment as above.
const DataLayout &DL = CASI->getModule()->getDataLayout();
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
@@ -160,14 +185,12 @@ unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
// and is of appropriate alignment, to be passed through for target
// lowering. (Versus turning into a __atomic libcall)
template <typename Inst>
-bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
+static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
unsigned Size = getAtomicOpSize(I);
unsigned Align = getAtomicOpAlign(I);
return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
-} // end anonymous namespace
-
bool AtomicExpand::runOnFunction(Function &F) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
@@ -320,16 +343,10 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
- // The trailing fence is emitted before the instruction instead of after
- // because there is no easy way of setting Builder insertion point after
- // an instruction. So we must erase it from the BB, and insert it back
- // in the right place.
// We have a guard here because not every atomic operation generates a
// trailing fence.
- if (TrailingFence) {
- TrailingFence->removeFromParent();
- TrailingFence->insertAfter(I);
- }
+ if (TrailingFence)
+ TrailingFence->moveAfter(I);
return (LeadingFence || TrailingFence);
}
@@ -562,6 +579,7 @@ struct PartwordMaskValues {
Value *Mask;
Value *Inv_Mask;
};
+
} // end anonymous namespace
/// This is a helper function which builds instructions to provide
@@ -580,7 +598,6 @@ struct PartwordMaskValues {
/// include only the part that would've been loaded from Addr.
///
/// Inv_Mask: The inverse of Mask.
-
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Type *ValueType, Value *Addr,
unsigned WordSize) {
@@ -686,7 +703,6 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
/// part of the value.
void AtomicExpand::expandPartwordAtomicRMW(
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
-
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
AtomicOrdering MemOpOrder = AI->getOrdering();
@@ -943,7 +959,6 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
return NewCI;
}
-
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index be93ff0dad29..d11f375b176e 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,21 +15,20 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
-#include <utility>
+#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
// This flag is used by the template base class for BasicTTIImpl, and here to
// provide a definition.
cl::opt<unsigned>
- llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
- cl::desc("Threshold for partial unrolling"),
- cl::Hidden);
+llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+ cl::desc("Threshold for partial unrolling"),
+ cl::Hidden);
BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp
deleted file mode 100644
index 2c41b597843c..000000000000
--- a/lib/CodeGen/BranchCoalescing.cpp
+++ /dev/null
@@ -1,758 +0,0 @@
-//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Coalesce basic blocks guarded by the same branch condition into a single
-/// basic block.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "branch-coalescing"
-
-static cl::opt<cl::boolOrDefault>
- EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
- cl::desc("enable coalescing of duplicate branches"));
-
-STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
-STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
-STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
-
-//===----------------------------------------------------------------------===//
-// BranchCoalescing
-//===----------------------------------------------------------------------===//
-///
-/// Improve scheduling by coalescing branches that depend on the same condition.
-/// This pass looks for blocks that are guarded by the same branch condition
-/// and attempts to merge the blocks together. Such opportunities arise from
-/// the expansion of select statements in the IR.
-///
-/// For example, consider the following LLVM IR:
-///
-/// %test = icmp eq i32 %x 0
-/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
-/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
-///
-/// This IR expands to the following machine code on PowerPC:
-///
-/// BB#0: derived from LLVM BB %entry
-/// Live Ins: %F1 %F3 %X6
-/// <SNIP1>
-/// %vreg0<def> = COPY %F1; F8RC:%vreg0
-/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
-/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
-/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
-/// BCC 76, %vreg5, <BB#2>; CRRC:%vreg5
-/// Successors according to CFG: BB#1(?%) BB#2(?%)
-///
-/// BB#1: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#0
-/// Successors according to CFG: BB#2(?%)
-///
-/// BB#2: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#0 BB#1
-/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
-/// F8RC:%vreg9,%vreg8,%vreg0
-/// <SNIP2>
-/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
-/// Successors according to CFG: BB#3(?%) BB#4(?%)
-///
-/// BB#3: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#2
-/// Successors according to CFG: BB#4(?%)
-///
-/// BB#4: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#2 BB#3
-/// %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>;
-/// F8RC:%vreg13,%vreg12,%vreg2
-/// <SNIP3>
-/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
-///
-/// When this pattern is detected, branch coalescing will try to collapse
-/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
-///
-/// If all conditions are meet, IR should collapse to:
-///
-/// BB#0: derived from LLVM BB %entry
-/// Live Ins: %F1 %F3 %X6
-/// <SNIP1>
-/// %vreg0<def> = COPY %F1; F8RC:%vreg0
-/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
-/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
-/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
-/// <SNIP2>
-/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
-/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%)
-/// BB#4(0x55555554 / 0x80000000 = 66.67%)
-///
-/// BB#1: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#0
-/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%)
-///
-/// BB#4: derived from LLVM BB %entry
-/// Predecessors according to CFG: BB#0 BB#1
-/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
-/// F8RC:%vreg9,%vreg8,%vreg0
-/// %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>;
-/// F8RC:%vreg13,%vreg12,%vreg2
-/// <SNIP3>
-/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
-///
-/// Branch Coalescing does not split blocks, it moves everything in the same
-/// direction ensuring it does not break use/definition semantics.
-///
-/// PHI nodes and its corresponding use instructions are moved to its successor
-/// block if there are no uses within the successor block PHI nodes. PHI
-/// node ordering cannot be assumed.
-///
-/// Non-PHI can be moved up to the predecessor basic block or down to the
-/// successor basic block following any PHI instructions. Whether it moves
-/// up or down depends on whether the register(s) defined in the instructions
-/// are used in current block or in any PHI instructions at the beginning of
-/// the successor block.
-
-namespace {
-
-class BranchCoalescing : public MachineFunctionPass {
- struct CoalescingCandidateInfo {
- MachineBasicBlock *BranchBlock; // Block containing the branch
- MachineBasicBlock *BranchTargetBlock; // Block branched to
- MachineBasicBlock *FallThroughBlock; // Fall-through if branch not taken
- SmallVector<MachineOperand, 4> Cond;
- bool MustMoveDown;
- bool MustMoveUp;
-
- CoalescingCandidateInfo();
- void clear();
- };
-
- MachineDominatorTree *MDT;
- MachinePostDominatorTree *MPDT;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
-
- void initialize(MachineFunction &F);
- bool canCoalesceBranch(CoalescingCandidateInfo &Cand);
- bool identicalOperands(ArrayRef<MachineOperand> OperandList1,
- ArrayRef<MachineOperand> OperandList2) const;
- bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion) const;
-
- static bool isBranchCoalescingEnabled() {
- return EnableBranchCoalescing == cl::BOU_TRUE;
- }
-
-public:
- static char ID;
-
- BranchCoalescing() : MachineFunctionPass(ID) {
- initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return "Branch Coalescing"; }
-
- bool mergeCandidates(CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion);
- bool canMoveToBeginning(const MachineInstr &MI,
- const MachineBasicBlock &MBB) const;
- bool canMoveToEnd(const MachineInstr &MI,
- const MachineBasicBlock &MBB) const;
- bool canMerge(CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion) const;
- void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB,
- MachineBasicBlock *TargetRegionMBB);
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // End anonymous namespace.
-
-char BranchCoalescing::ID = 0;
-char &llvm::BranchCoalescingID = BranchCoalescing::ID;
-
-INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
- "Branch Coalescing", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
- false, false)
-
-BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
- : BranchBlock(nullptr), BranchTargetBlock(nullptr),
- FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
-
-void BranchCoalescing::CoalescingCandidateInfo::clear() {
- BranchBlock = nullptr;
- BranchTargetBlock = nullptr;
- FallThroughBlock = nullptr;
- Cond.clear();
- MustMoveDown = false;
- MustMoveUp = false;
-}
-
-void BranchCoalescing::initialize(MachineFunction &MF) {
- MDT = &getAnalysis<MachineDominatorTree>();
- MPDT = &getAnalysis<MachinePostDominatorTree>();
- TII = MF.getSubtarget().getInstrInfo();
- MRI = &MF.getRegInfo();
-}
-
-///
-/// Analyze the branch statement to determine if it can be coalesced. This
-/// method analyses the branch statement for the given candidate to determine
-/// if it can be coalesced. If the branch can be coalesced, then the
-/// BranchTargetBlock and the FallThroughBlock are recorded in the specified
-/// Candidate.
-///
-///\param[in,out] Cand The coalescing candidate to analyze
-///\return true if and only if the branch can be coalesced, false otherwise
-///
-bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
- DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
- << " can be coalesced:");
- MachineBasicBlock *FalseMBB = nullptr;
-
- if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB,
- Cand.Cond)) {
- DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n");
- return false;
- }
-
- for (auto &I : Cand.BranchBlock->terminators()) {
- DEBUG(dbgs() << "Looking at terminator : " << I << "\n");
- if (!I.isBranch())
- continue;
-
- if (I.getNumOperands() != I.getNumExplicitOperands()) {
- DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
- << "\n");
- return false;
- }
- }
-
- if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) {
- DEBUG(dbgs() << "EH Pad - skip\n");
- return false;
- }
-
- // For now only consider triangles (i.e, BranchTargetBlock is set,
- // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
- if (!Cand.BranchTargetBlock || FalseMBB ||
- !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) {
- DEBUG(dbgs() << "Does not form a triangle - skip\n");
- return false;
- }
-
- // Ensure there are only two successors
- if (Cand.BranchBlock->succ_size() != 2) {
- DEBUG(dbgs() << "Does not have 2 successors - skip\n");
- return false;
- }
-
- // Sanity check - the block must be able to fall through
- assert(Cand.BranchBlock->canFallThrough() &&
- "Expecting the block to fall through!");
-
- // We have already ensured there are exactly two successors to
- // BranchBlock and that BranchTargetBlock is a successor to BranchBlock.
- // Ensure the single fall though block is empty.
- MachineBasicBlock *Succ =
- (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock)
- ? *Cand.BranchBlock->succ_rbegin()
- : *Cand.BranchBlock->succ_begin();
-
- assert(Succ && "Expecting a valid fall-through block\n");
-
- if (!Succ->empty()) {
- DEBUG(dbgs() << "Fall-through block contains code -- skip\n");
- return false;
- }
-
- if (!Succ->isSuccessor(Cand.BranchTargetBlock)) {
- DEBUG(dbgs()
- << "Successor of fall through block is not branch taken block\n");
- return false;
- }
-
- Cand.FallThroughBlock = Succ;
- DEBUG(dbgs() << "Valid Candidate\n");
- return true;
-}
-
-///
-/// Determine if the two operand lists are identical
-///
-/// \param[in] OpList1 operand list
-/// \param[in] OpList2 operand list
-/// \return true if and only if the operands lists are identical
-///
-bool BranchCoalescing::identicalOperands(
- ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
-
- if (OpList1.size() != OpList2.size()) {
- DEBUG(dbgs() << "Operand list is different size\n");
- return false;
- }
-
- for (unsigned i = 0; i < OpList1.size(); ++i) {
- const MachineOperand &Op1 = OpList1[i];
- const MachineOperand &Op2 = OpList2[i];
-
- DEBUG(dbgs() << "Op1: " << Op1 << "\n"
- << "Op2: " << Op2 << "\n");
-
- if (Op1.isIdenticalTo(Op2)) {
- DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
- continue;
- }
-
- // If the operands are not identical, but are registers, check to see if the
- // definition of the register produces the same value. If they produce the
- // same value, consider them to be identical.
- if (Op1.isReg() && Op2.isReg() &&
- TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
- TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
- MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
- MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
- if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
- DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def
- << " produce the same value!\n");
- } else {
- DEBUG(dbgs() << "Operands produce different values\n");
- return false;
- }
- } else {
- DEBUG(dbgs() << "The operands are not provably identical.\n");
- return false;
- }
- }
- return true;
-}
-
-///
-/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB
-/// and update them to refer to the new block. PHI node ordering
-/// cannot be assumed so it does not matter where the PHI instructions
-/// are moved to in TargetMBB.
-///
-/// \param[in] SourceMBB block to move PHI instructions from
-/// \param[in] TargetMBB block to move PHI instructions to
-///
-void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
- MachineBasicBlock *TargetMBB) {
-
- MachineBasicBlock::iterator MI = SourceMBB->begin();
- MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI();
-
- if (MI == ME) {
- DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n");
- return;
- }
-
- // Update all PHI instructions in SourceMBB and move to top of TargetMBB
- for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) {
- MachineInstr &PHIInst = *Iter;
- for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) {
- MachineOperand &MO = PHIInst.getOperand(i);
- if (MO.getMBB() == SourceMBB)
- MO.setMBB(TargetMBB);
- }
- }
- TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME);
-}
-
-///
-/// This function checks if MI can be moved to the beginning of the TargetMBB
-/// following PHI instructions. A MI instruction can be moved to beginning of
-/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes.
-///
-/// \param[in] MI the machine instruction to move.
-/// \param[in] TargetMBB the machine basic block to move to
-/// \return true if it is safe to move MI to beginning of TargetMBB,
-/// false otherwise.
-///
-bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
- const MachineBasicBlock &TargetMBB
- ) const {
-
- DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of "
- << TargetMBB.getNumber() << "\n");
-
- for (auto &Def : MI.defs()) { // Looking at Def
- for (auto &Use : MRI->use_instructions(Def.getReg())) {
- if (Use.isPHI() && Use.getParent() == &TargetMBB) {
- DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n");
- return false;
- }
- }
- }
-
- DEBUG(dbgs() << " Safe to move to the beginning.\n");
- return true;
-}
-
-///
-/// This function checks if MI can be moved to the end of the TargetMBB,
-/// immediately before the first terminator. A MI instruction can be moved
-/// to then end of the TargetMBB if no PHI node defines what MI uses within
-/// it's own MBB.
-///
-/// \param[in] MI the machine instruction to move.
-/// \param[in] TargetMBB the machine basic block to move to
-/// \return true if it is safe to move MI to end of TargetMBB,
-/// false otherwise.
-///
-bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
- const MachineBasicBlock &TargetMBB
- ) const {
-
- DEBUG(dbgs() << "Checking if " << MI << " can move to end of "
- << TargetMBB.getNumber() << "\n");
-
- for (auto &Use : MI.uses()) {
- if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
- MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
- if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
- DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
- return false;
- } else {
- DEBUG(dbgs() << " *** def is in another block -- safe to move!\n");
- }
- }
- }
-
- DEBUG(dbgs() << " Safe to move to the end.\n");
- return true;
-}
-
-///
-/// This method checks to ensure the two coalescing candidates follows the
-/// expected pattern required for coalescing.
-///
-/// \param[in] SourceRegion The candidate to move statements from
-/// \param[in] TargetRegion The candidate to move statements to
-/// \return true if all instructions in SourceRegion.BranchBlock can be merged
-/// into a block in TargetRegion; false otherwise.
-///
-bool BranchCoalescing::validateCandidates(
- CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion) const {
-
- if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock)
- llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion");
- else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock))
- llvm_unreachable("Expecting TargetRegion to dominate SourceRegion");
- else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock))
- llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion");
- else if (!TargetRegion.FallThroughBlock->empty() ||
- !SourceRegion.FallThroughBlock->empty())
- llvm_unreachable("Expecting fall-through blocks to be empty");
-
- return true;
-}
-
-///
-/// This method determines whether the two coalescing candidates can be merged.
-/// In order to be merged, all instructions must be able to
-/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock;
-/// 2. Move to the end of the TargetRegion.BranchBlock.
-/// Merging involves moving the instructions in the
-/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock).
-///
-/// This function first try to move instructions from the
-/// TargetRegion.BranchTargetBlock down, to the beginning of the
-/// SourceRegion.BranchTargetBlock. This is not possible if any register defined
-/// in TargetRegion.BranchTargetBlock is used in a PHI node in the
-/// SourceRegion.BranchTargetBlock. In this case, check whether the statement
-/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately
-/// before the branch statement). If it cannot move, then these blocks cannot
-/// be merged.
-///
-/// Note that there is no analysis for moving instructions past the fall-through
-/// blocks because they are confirmed to be empty. An assert is thrown if they
-/// are not.
-///
-/// \param[in] SourceRegion The candidate to move statements from
-/// \param[in] TargetRegion The candidate to move statements to
-/// \return true if all instructions in SourceRegion.BranchBlock can be merged
-/// into a block in TargetRegion, false otherwise.
-///
-bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion) const {
- if (!validateCandidates(SourceRegion, TargetRegion))
- return false;
-
- // Walk through PHI nodes first and see if they force the merge into the
- // SourceRegion.BranchTargetBlock.
- for (MachineBasicBlock::iterator
- I = SourceRegion.BranchBlock->instr_begin(),
- E = SourceRegion.BranchBlock->getFirstNonPHI();
- I != E; ++I) {
- for (auto &Def : I->defs())
- for (auto &Use : MRI->use_instructions(Def.getReg())) {
- if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) {
- DEBUG(dbgs() << "PHI " << *I << " defines register used in another "
- "PHI within branch target block -- can't merge\n");
- NumPHINotMoved++;
- return false;
- }
- if (Use.getParent() == SourceRegion.BranchBlock) {
- DEBUG(dbgs() << "PHI " << *I
- << " defines register used in this "
- "block -- all must move down\n");
- SourceRegion.MustMoveDown = true;
- }
- }
- }
-
- // Walk through the MI to see if they should be merged into
- // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down)
- for (MachineBasicBlock::iterator
- I = SourceRegion.BranchBlock->getFirstNonPHI(),
- E = SourceRegion.BranchBlock->end();
- I != E; ++I) {
- if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) {
- DEBUG(dbgs() << "Instruction " << *I
- << " cannot move down - must move up!\n");
- SourceRegion.MustMoveUp = true;
- }
- if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) {
- DEBUG(dbgs() << "Instruction " << *I
- << " cannot move up - must move down!\n");
- SourceRegion.MustMoveDown = true;
- }
- }
-
- return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true;
-}
-
-/// Merge the instructions from SourceRegion.BranchBlock,
-/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into
-/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and
-/// TargetRegion.FallThroughBlock respectively.
-///
-/// The successors for blocks in TargetRegion will be updated to use the
-/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion
-/// will be removed from the function.
-///
-/// A region consists of a BranchBlock, a FallThroughBlock, and a
-/// BranchTargetBlock. Branch coalesce works on patterns where the
-/// TargetRegion's BranchTargetBlock must also be the SourceRegions's
-/// BranchBlock.
-///
-/// Before mergeCandidates:
-///
-/// +---------------------------+
-/// | TargetRegion.BranchBlock |
-/// +---------------------------+
-/// / |
-/// / +--------------------------------+
-/// | | TargetRegion.FallThroughBlock |
-/// \ +--------------------------------+
-/// \ |
-/// +----------------------------------+
-/// | TargetRegion.BranchTargetBlock |
-/// | SourceRegion.BranchBlock |
-/// +----------------------------------+
-/// / |
-/// / +--------------------------------+
-/// | | SourceRegion.FallThroughBlock |
-/// \ +--------------------------------+
-/// \ |
-/// +----------------------------------+
-/// | SourceRegion.BranchTargetBlock |
-/// +----------------------------------+
-///
-/// After mergeCandidates:
-///
-/// +-----------------------------+
-/// | TargetRegion.BranchBlock |
-/// | SourceRegion.BranchBlock |
-/// +-----------------------------+
-/// / |
-/// / +---------------------------------+
-/// | | TargetRegion.FallThroughBlock |
-/// | | SourceRegion.FallThroughBlock |
-/// \ +---------------------------------+
-/// \ |
-/// +----------------------------------+
-/// | SourceRegion.BranchTargetBlock |
-/// +----------------------------------+
-///
-/// \param[in] SourceRegion The candidate to move blocks from
-/// \param[in] TargetRegion The candidate to move blocks to
-///
-bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
- CoalescingCandidateInfo &TargetRegion) {
-
- if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
- llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!");
- return false;
- }
-
- if (!validateCandidates(SourceRegion, TargetRegion))
- return false;
-
- // Start the merging process by first handling the BranchBlock.
- // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block
- moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
-
- // Move remaining instructions in SourceRegion.BranchBlock into
- // TargetRegion.BranchBlock
- MachineBasicBlock::iterator firstInstr =
- SourceRegion.BranchBlock->getFirstNonPHI();
- MachineBasicBlock::iterator lastInstr =
- SourceRegion.BranchBlock->getFirstTerminator();
-
- MachineBasicBlock *Source = SourceRegion.MustMoveDown
- ? SourceRegion.BranchTargetBlock
- : TargetRegion.BranchBlock;
-
- MachineBasicBlock::iterator Target =
- SourceRegion.MustMoveDown
- ? SourceRegion.BranchTargetBlock->getFirstNonPHI()
- : TargetRegion.BranchBlock->getFirstTerminator();
-
- Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr);
-
- // Once PHI and instructions have been moved we need to clean up the
- // control flow.
-
- // Remove SourceRegion.FallThroughBlock before transferring successors of
- // SourceRegion.BranchBlock to TargetRegion.BranchBlock.
- SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock);
- TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs(
- SourceRegion.BranchBlock);
- // Update branch in TargetRegion.BranchBlock to jump to
- // SourceRegion.BranchTargetBlock
- // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock.
- TargetRegion.BranchBlock->ReplaceUsesOfBlockWith(
- SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
- // Remove the branch statement(s) in SourceRegion.BranchBlock
- MachineBasicBlock::iterator I =
- SourceRegion.BranchBlock->terminators().begin();
- while (I != SourceRegion.BranchBlock->terminators().end()) {
- MachineInstr &CurrInst = *I;
- ++I;
- if (CurrInst.isBranch())
- CurrInst.eraseFromParent();
- }
-
- // Fall-through block should be empty since this is part of the condition
- // to coalesce the branches.
- assert(TargetRegion.FallThroughBlock->empty() &&
- "FallThroughBlocks should be empty!");
-
- // Transfer successor information and move PHIs down to the
- // branch-taken block.
- TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
- SourceRegion.FallThroughBlock);
- TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
-
- // Remove the blocks from the function.
- assert(SourceRegion.BranchBlock->empty() &&
- "Expecting branch block to be empty!");
- SourceRegion.BranchBlock->eraseFromParent();
-
- assert(SourceRegion.FallThroughBlock->empty() &&
- "Expecting fall-through block to be empty!\n");
- SourceRegion.FallThroughBlock->eraseFromParent();
-
- NumBlocksCoalesced++;
- return true;
-}
-
-bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
-
- if (skipFunction(*MF.getFunction()) || MF.empty() ||
- !isBranchCoalescingEnabled())
- return false;
-
- bool didSomething = false;
-
- DEBUG(dbgs() << "******** Branch Coalescing ********\n");
- initialize(MF);
-
- DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
-
- CoalescingCandidateInfo Cand1, Cand2;
- // Walk over blocks and find candidates to merge
- // Continue trying to merge with the first candidate found, as long as merging
- // is successfull.
- for (MachineBasicBlock &MBB : MF) {
- bool MergedCandidates = false;
- do {
- MergedCandidates = false;
- Cand1.clear();
- Cand2.clear();
-
- Cand1.BranchBlock = &MBB;
-
- // If unable to coalesce the branch, then continue to next block
- if (!canCoalesceBranch(Cand1))
- break;
-
- Cand2.BranchBlock = Cand1.BranchTargetBlock;
- if (!canCoalesceBranch(Cand2))
- break;
-
- // Sanity check
- // The branch-taken block of the second candidate should post-dominate the
- // first candidate
- assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
- "Branch-taken block should post-dominate first candidate");
-
- if (!identicalOperands(Cand1.Cond, Cand2.Cond)) {
- DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and "
- << Cand2.BranchBlock->getNumber()
- << " have different branches\n");
- break;
- }
- if (!canMerge(Cand2, Cand1)) {
- DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber()
- << " and " << Cand2.BranchBlock->getNumber() << "\n");
- NumBlocksNotCoalesced++;
- continue;
- }
- DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber()
- << " and " << Cand1.BranchTargetBlock->getNumber() << "\n");
- MergedCandidates = mergeCandidates(Cand2, Cand1);
- if (MergedCandidates)
- didSomething = true;
-
- DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n");
- } while (MergedCandidates);
- }
-
-#ifndef NDEBUG
- // Verify MF is still valid after branch coalescing
- if (didSomething)
- MF.verify(nullptr, "Error in code produced by branch coalescing");
-#endif // NDEBUG
-
- DEBUG(dbgs() << "Finished Branch Coalescing\n");
- return didSomething;
-}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 3c439e66944b..7f358a679366 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -19,27 +19,35 @@
#include "BranchFolding.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
@@ -48,10 +56,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstddef>
#include <iterator>
@@ -81,8 +86,8 @@ TailMergeThreshold("tail-merge-threshold",
// TODO: This should be replaced with a target query.
static cl::opt<unsigned>
TailMergeSize("tail-merge-size",
- cl::desc("Min number of instructions to consider tail merging"),
- cl::init(3), cl::Hidden);
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
namespace {
@@ -106,13 +111,14 @@ namespace {
} // end anonymous namespace
char BranchFolderPass::ID = 0;
+
char &llvm::BranchFolderPassID = BranchFolderPass::ID;
INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
"Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -365,15 +371,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
- MachineBasicBlock *NewDest) {
- TII->ReplaceTailWithBranchTo(OldInst, NewDest);
-
+void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock &NewDest) {
if (UpdateLiveIns) {
- NewDest->clearLiveIns();
- computeLiveIns(LiveRegs, *MRI, *NewDest);
+ // OldInst should always point to an instruction.
+ MachineBasicBlock &OldMBB = *OldInst->getParent();
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(OldMBB);
+ // Move backward to the place where will insert the jump.
+ MachineBasicBlock::iterator I = OldMBB.end();
+ do {
+ --I;
+ LiveRegs.stepBackward(*I);
+ } while (I != OldInst);
+
+ // Merging the tails may have switched some undef operand to non-undef ones.
+ // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the
+ // register.
+ for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {
+ // We computed the liveins with computeLiveIn earlier and should only see
+ // full registers:
+ assert(P.LaneMask == LaneBitmask::getAll() &&
+ "Can only handle full register.");
+ MCPhysReg Reg = P.PhysReg;
+ if (!LiveRegs.available(*MRI, Reg))
+ continue;
+ DebugLoc DL;
+ BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
}
+ TII->ReplaceTailWithBranchTo(OldInst, &NewDest);
++NumTailMerge;
}
@@ -408,7 +436,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
if (UpdateLiveIns)
- computeLiveIns(LiveRegs, *MRI, *NewMBB);
+ computeAndAddLiveIns(LiveRegs, *NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -585,8 +613,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
if (CommonTailLen == 0)
return false;
- DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
- << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
+ << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen
<< '\n');
// It's almost always profitable to merge any number of non-terminator
@@ -657,7 +685,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+ return EffectiveTailLen >= 2 && MF->getFunction().optForSize() &&
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
@@ -742,7 +770,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
SameTails[commonTailIndex].getTailStartPos();
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
<< maxCommonTailLength);
// If the split block unconditionally falls-thru to SuccBB, it will be
@@ -766,43 +794,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
-void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
- MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
-
- std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
- for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
- if (i != commonTailIndex)
- NextCommonInsts[i] = SameTails[i].getTailStartPos();
- else {
- assert(SameTails[i].getTailStartPos() == MBB->begin() &&
- "MBB is not a common tail only block");
- }
- }
-
- for (auto &MI : *MBB) {
- if (MI.isDebugValue())
- continue;
- DebugLoc DL = MI.getDebugLoc();
- for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
- if (i == commonTailIndex)
- continue;
-
- auto &Pos = NextCommonInsts[i];
- assert(Pos != SameTails[i].getBlock()->end() &&
- "Reached BB end within common tail");
- while (Pos->isDebugValue()) {
- ++Pos;
- assert(Pos != SameTails[i].getBlock()->end() &&
- "Reached BB end within common tail");
- }
- assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
- DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
- NextCommonInsts[i] = ++Pos;
- }
- MI.setDebugLoc(DL);
- }
-}
-
static void
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -853,6 +844,67 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
}
}
+void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+ for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+ if (i != commonTailIndex) {
+ NextCommonInsts[i] = SameTails[i].getTailStartPos();
+ mergeOperations(SameTails[i].getTailStartPos(), *MBB);
+ } else {
+ assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+ "MBB is not a common tail only block");
+ }
+ }
+
+ for (auto &MI : *MBB) {
+ if (MI.isDebugValue())
+ continue;
+ DebugLoc DL = MI.getDebugLoc();
+ for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+ if (i == commonTailIndex)
+ continue;
+
+ auto &Pos = NextCommonInsts[i];
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ while (Pos->isDebugValue()) {
+ ++Pos;
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ }
+ assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+ DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+ NextCommonInsts[i] = ++Pos;
+ }
+ MI.setDebugLoc(DL);
+ }
+
+ if (UpdateLiveIns) {
+ LivePhysRegs NewLiveIns(*TRI);
+ computeLiveIns(NewLiveIns, *MBB);
+
+ // The flag merging may lead to some register uses no longer using the
+ // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveRegs.init(*TRI);
+ LiveRegs.addLiveOuts(*Pred);
+ MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
+ for (unsigned Reg : NewLiveIns) {
+ if (!LiveRegs.available(*MRI, Reg))
+ continue;
+ DebugLoc DL;
+ BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
+ Reg);
+ }
+ }
+
+ MBB->clearLiveIns();
+ addLiveIns(*MBB, NewLiveIns);
+ }
+}
+
// See if any of the blocks in MergePotentials (which all have SuccBB as a
// successor, or all have no successor if it is null) can be tail-merged.
// If there is a successor, any blocks in MergePotentials that are not
@@ -868,20 +920,17 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
bool MadeChange = false;
DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
- << (i == e-1 ? "" : ", ");
- dbgs() << "\n";
- if (SuccBB) {
- dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()
+ << printMBBReference(*MergePotentials[i].getBlock())
+ << (i == e - 1 ? "" : ", ");
+ dbgs() << "\n"; if (SuccBB) {
+ dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';
if (PredBB)
- dbgs() << " which has fall-through from BB#"
- << PredBB->getNumber() << "\n";
- }
- dbgs() << "Looking for common tails of at least "
- << MinCommonTailLength << " instruction"
- << (MinCommonTailLength == 1 ? "" : "s") << '\n';
- );
+ dbgs() << " which has fall-through from "
+ << printMBBReference(*PredBB) << "\n";
+ } dbgs() << "Looking for common tails of at least "
+ << MinCommonTailLength << " instruction"
+ << (MinCommonTailLength == 1 ? "" : "s") << '\n';);
// Sort by hash value so that blocks with identical end sequences sort
// together.
@@ -955,22 +1004,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
- // Merge debug locations across identical instructions for common tail.
- MergeCommonTailDebugLocs(commonTailIndex);
+ // Merge debug locations, MMOs and undef flags across identical instructions
+ // for common tail.
+ mergeCommonTails(commonTailIndex);
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
- DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
<< " for ");
for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
if (commonTailIndex == i)
continue;
- DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
- << (i == e-1 ? "" : ", "));
- // Merge operations (MMOs, undef flags)
- mergeOperations(SameTails[i].getTailStartPos(), *MBB);
+ DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
+ << (i == e - 1 ? "" : ", "));
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
- ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
MergePotentials.erase(SameTails[i].getMPIter());
}
@@ -1463,7 +1511,7 @@ ReoptimizeBlock:
}
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction()->optForSize()) {
+ MF.getFunction().optForSize()) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.
@@ -1819,7 +1867,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
return MBB->end();
-
// Find out what registers are live. Note this routine is ignoring other live
// registers which are only used by instructions in successor blocks.
for (const MachineOperand &MO : PI->operands()) {
@@ -1921,7 +1968,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
//
// BB2:
// r1 = op2, ...
- // = op3, r1<kill>
+ // = op3, killed r1
IsSafe = false;
break;
}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 92681137e4c6..0f0952550137 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,4 +1,4 @@
-//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===//
+//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,20 +10,27 @@
#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
#include <vector>
namespace llvm {
- class MachineBlockFrequencyInfo;
- class MachineBranchProbabilityInfo;
- class MachineFunction;
- class MachineModuleInfo;
- class MachineLoopInfo;
- class TargetInstrInfo;
- class TargetRegisterInfo;
+
+class BasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineLoopInfo;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class raw_ostream;
+class TargetInstrInfo;
+class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
@@ -49,6 +56,7 @@ namespace llvm {
class MergePotentialsElt {
unsigned Hash;
MachineBasicBlock *Block;
+
public:
MergePotentialsElt(unsigned h, MachineBasicBlock *b)
: Hash(h), Block(b) {}
@@ -62,7 +70,9 @@ namespace llvm {
bool operator<(const MergePotentialsElt &) const;
};
- typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+
+ using MPIterator = std::vector<MergePotentialsElt>::iterator;
+
std::vector<MergePotentialsElt> MergePotentials;
SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
DenseMap<const MachineBasicBlock *, int> FuncletMembership;
@@ -70,6 +80,7 @@ namespace llvm {
class SameTailElt {
MPIterator MPIter;
MachineBasicBlock::iterator TailStartPos;
+
public:
SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
: MPIter(mp), TailStartPos(tsp) {}
@@ -77,18 +88,23 @@ namespace llvm {
MPIterator getMPIter() const {
return MPIter;
}
+
MergePotentialsElt &getMergePotentialsElt() const {
return *getMPIter();
}
+
MachineBasicBlock::iterator getTailStartPos() const {
return TailStartPos;
}
+
unsigned getHash() const {
return getMergePotentialsElt().getHash();
}
+
MachineBasicBlock *getBlock() const {
return getMergePotentialsElt().getBlock();
}
+
bool tailIsWholeBlock() const {
return TailStartPos == getBlock()->begin();
}
@@ -96,6 +112,7 @@ namespace llvm {
void setBlock(MachineBasicBlock *MBB) {
getMergePotentialsElt().setBlock(MBB);
}
+
void setTailStartPos(MachineBasicBlock::iterator Pos) {
TailStartPos = Pos;
}
@@ -120,6 +137,7 @@ namespace llvm {
class MBFIWrapper {
public:
MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
raw_ostream &printBlockFreq(raw_ostream &OS,
@@ -146,8 +164,8 @@ namespace llvm {
/// Delete the instruction OldInst and everything after it, replacing it
/// with an unconditional branch to NewDest.
- void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
- MachineBasicBlock *NewDest);
+ void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock &NewDest);
/// Given a machine basic block and an iterator into it, split the MBB so
/// that the part before the iterator falls into the part starting at the
@@ -182,8 +200,8 @@ namespace llvm {
unsigned &commonTailIndex);
/// Create merged DebugLocs of identical instructions across SameTails and
- /// assign it to the instruction in common tail.
- void MergeCommonTailDebugLocs(unsigned commonTailIndex);
+ /// assign it to the instruction in common tail; merge MMOs and undef flags.
+ void mergeCommonTails(unsigned commonTailIndex);
bool OptimizeBranches(MachineFunction &MF);
@@ -203,6 +221,7 @@ namespace llvm {
/// the function, move the instructions before MBB terminator if it's legal.
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
-}
-#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 27ee12c4c5ff..0d87f142c7cc 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -1,4 +1,4 @@
-//===-- BranchRelaxation.cpp ----------------------------------------------===//
+//===- BranchRelaxation.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,14 +10,25 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
using namespace llvm;
@@ -30,6 +41,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
#define BRANCH_RELAX_NAME "Branch relaxation pass"
namespace {
+
class BranchRelaxation : public MachineFunctionPass {
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
@@ -38,16 +50,16 @@ class BranchRelaxation : public MachineFunctionPass {
/// of this basic block.
///
/// The offset is always aligned as required by the basic block.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
- BasicBlockInfo() : Offset(0), Size(0) {}
+ BasicBlockInfo() = default;
/// Compute the offset immediately following this block. \p MBB is the next
/// block.
@@ -95,18 +107,18 @@ class BranchRelaxation : public MachineFunctionPass {
public:
static char ID;
- BranchRelaxation() : MachineFunctionPass(ID) { }
+
+ BranchRelaxation() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
- StringRef getPassName() const override {
- return BRANCH_RELAX_NAME;
- }
+ StringRef getPassName() const override { return BRANCH_RELAX_NAME; }
};
-}
+} // end anonymous namespace
char BranchRelaxation::ID = 0;
+
char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
@@ -131,7 +143,7 @@ void BranchRelaxation::verify() {
LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
- dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+ dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
@@ -196,7 +208,7 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
}
}
- /// Insert a new empty basic block and insert it after \BB
+/// Insert a new empty basic block and insert it after \BB
MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
@@ -233,7 +245,6 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Insert an entry into BlockInfo to align it properly with the block numbers.
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
-
NewBB->transferSuccessors(OrigBB);
OrigBB->addSuccessor(NewBB);
OrigBB->addSuccessor(DestBB);
@@ -259,7 +270,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
- computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB);
+ computeAndAddLiveIns(LiveRegs, *NewBB);
++NumSplit;
@@ -276,13 +287,10 @@ bool BranchRelaxation::isBlockInRange(
if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
return true;
- DEBUG(
- dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber()
- << " from BB#" << MI.getParent()->getNumber()
- << " to " << DestOffset
- << " offset " << DestOffset - BrOffset
- << '\t' << MI
- );
+ DEBUG(dbgs() << "Out of range branch to destination "
+ << printMBBReference(DestBB) << " from "
+ << printMBBReference(*MI.getParent()) << " to " << DestOffset
+ << " offset " << DestOffset - BrOffset << '\t' << MI);
return false;
}
@@ -348,16 +356,16 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
- computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB);
+ computeAndAddLiveIns(LiveRegs, NewBB);
}
// We now have an appropriate fall-through block in place (either naturally or
// just created), so we can invert the condition.
MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
- DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber()
- << ", invert condition and change dest. to BB#"
- << NextBB.getNumber() << '\n');
+ DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB)
+ << ", invert condition and change dest. to "
+ << printMBBReference(NextBB) << '\n');
unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 7f3c6da91268..07ba5d36cc96 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen
Analysis.cpp
AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
- BranchCoalescing.cpp
BranchFolding.cpp
BranchRelaxation.cpp
BuiltinGCs.cpp
@@ -12,7 +11,6 @@ add_llvm_library(LLVMCodeGen
CallingConvLower.cpp
CodeGen.cpp
CodeGenPrepare.cpp
- CountingFunctionInserter.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
DetectDeadLanes.cpp
@@ -22,6 +20,7 @@ add_llvm_library(LLVMCodeGen
EdgeBundles.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
+ ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
FaultMaps.cpp
@@ -43,7 +42,7 @@ add_llvm_library(LLVMCodeGen
LexicalScopes.cpp
LiveDebugValues.cpp
LiveDebugVariables.cpp
- LiveIntervalAnalysis.cpp
+ LiveIntervals.cpp
LiveInterval.cpp
LiveIntervalUnion.cpp
LivePhysRegs.cpp
@@ -77,6 +76,7 @@ add_llvm_library(LLVMCodeGen
MachineLoopInfo.cpp
MachineModuleInfo.cpp
MachineModuleInfoImpls.cpp
+ MachineOperand.cpp
MachineOptimizationRemarkEmitter.cpp
MachineOutliner.cpp
MachinePassRegistry.cpp
@@ -114,6 +114,7 @@ add_llvm_library(LLVMCodeGen
RegisterPressure.cpp
RegisterScavenging.cpp
RenameIndependentSubregs.cpp
+ MIRCanonicalizerPass.cpp
RegisterUsageInfo.cpp
RegUsageInfoCollector.cpp
RegUsageInfoPropagate.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index c2ced19458ed..b8920a601938 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -1,4 +1,4 @@
-//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//===- CalcSpillWeights.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,17 +8,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <tuple>
+
using namespace llvm;
#define DEBUG_TYPE "calcspillweights"
@@ -64,13 +70,24 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
return sub == hsub ? hreg : 0;
const TargetRegisterClass *rc = mri.getRegClass(reg);
+ if (!tri.enableMultipleCopyHints()) {
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+ }
+
+ unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+ if (rc->contains(CopiedPReg))
+ return CopiedPReg;
- // Only allow physreg hints in rc.
- if (sub == 0)
- return rc->contains(hreg) ? hreg : 0;
+ // Check if reg:sub matches so that a super register could be hinted.
+ if (sub)
+ return tri.getMatchingSuperReg(CopiedPReg, sub, rc);
- // reg:sub should match the physreg hreg.
- return tri.getMatchingSuperReg(hreg, sub, rc);
+ return 0;
}
// Check if all values in LI are rematerializable
@@ -127,8 +144,21 @@ static bool isRematerializable(const LiveInterval &LI,
return true;
}
-void
-VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
+ float weight = weightCalcHelper(li);
+ // Check if unspillable.
+ if (weight < 0)
+ return;
+ li.weight = weight;
+}
+
+float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
+ SlotIndex end) {
+ return weightCalcHelper(li, &start, &end);
+}
+
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
+ SlotIndex *end) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
MachineBasicBlock *mbb = nullptr;
@@ -138,20 +168,73 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
unsigned numInstr = 0; // Number of instructions using li
SmallPtrSet<MachineInstr*, 8> visited;
- // Find the best physreg hint and the best virtreg hint.
- float bestPhys = 0, bestVirt = 0;
- unsigned hintPhys = 0, hintVirt = 0;
-
- // Don't recompute a target specific hint.
- bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+ std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg);
// Don't recompute spill weight for an unspillable register.
bool Spillable = li.isSpillable();
+ bool localSplitArtifact = start && end;
+
+ // Do not update future local split artifacts.
+ bool updateLI = !localSplitArtifact;
+
+ if (localSplitArtifact) {
+ MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
+ assert(localMBB == LIS.getMBBFromIndex(*start) &&
+ "start and end are expected to be in the same basic block");
+
+ // Local split artifact will have 2 additional copy instructions and they
+ // will be in the same BB.
+ // localLI = COPY other
+ // ...
+ // other = COPY localLI
+ totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
+ totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+
+ numInstr += 2;
+ }
+
+ // CopyHint is a sortable hint derived from a COPY instruction.
+ struct CopyHint {
+ unsigned Reg;
+ float Weight;
+ bool IsPhys;
+ unsigned HintOrder;
+ CopyHint(unsigned R, float W, bool P, unsigned HR) :
+ Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {}
+ bool operator<(const CopyHint &rhs) const {
+ // Always prefer any physreg hint.
+ if (IsPhys != rhs.IsPhys)
+ return (IsPhys && !rhs.IsPhys);
+ if (Weight != rhs.Weight)
+ return (Weight > rhs.Weight);
+
+ // This is just a temporary way to achive NFC for targets that don't
+ // enable multiple copy hints. HintOrder should be removed when all
+ // targets return true in enableMultipleCopyHints().
+ return (HintOrder < rhs.HintOrder);
+
+#if 0 // Should replace the HintOrder check, see above.
+ // (just for the purpose of maintaining the set)
+ return Reg < rhs.Reg;
+#endif
+ }
+ };
+ std::set<CopyHint> CopyHints;
+
+ // Temporary: see comment for HintOrder above.
+ unsigned CopyHintOrder = 0;
for (MachineRegisterInfo::reg_instr_iterator
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
MachineInstr *mi = &*(I++);
+
+ // For local split artifacts, we are interested only in instructions between
+ // the expected start and end of the range.
+ SlotIndex si = LIS.getInstructionIndex(*mi);
+ if (localSplitArtifact && ((si < *start) || (si > *end)))
+ continue;
+
numInstr++;
if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
continue;
@@ -180,7 +263,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
}
// Get allocation hints from copies.
- if (noHint || !mi->isCopy())
+ if (!mi->isCopy() ||
+ (TargetHint.first != 0 && !tri.enableMultipleCopyHints()))
continue;
unsigned hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
@@ -190,39 +274,43 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
//
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
- if (TargetRegisterInfo::isPhysicalRegister(hint)) {
- if (hweight > bestPhys && mri.isAllocatable(hint)) {
- bestPhys = hweight;
- hintPhys = hint;
- }
- } else {
- if (hweight > bestVirt) {
- bestVirt = hweight;
- hintVirt = hint;
- }
- }
+ if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
+ CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint),
+ (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++)));
}
Hint.clear();
- // Always prefer the physreg hint.
- if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
- mri.setRegAllocationHint(li.reg, 0, hint);
+ // Pass all the sorted copy hints to mri.
+ if (updateLI && CopyHints.size()) {
+ // Remove a generic hint if previously added by target.
+ if (TargetHint.first == 0 && TargetHint.second)
+ mri.clearSimpleHint(li.reg);
+
+ for (auto &Hint : CopyHints) {
+ if (TargetHint.first != 0 && Hint.Reg == TargetHint.second)
+ // Don't add again the target-type hint.
+ continue;
+ mri.addRegAllocationHint(li.reg, Hint.Reg);
+ if (!tri.enableMultipleCopyHints())
+ break;
+ }
+
// Weakly boost the spill weight of hinted registers.
totalWeight *= 1.01F;
}
// If the live interval was already unspillable, leave it that way.
if (!Spillable)
- return;
+ return -1.0;
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
// spilling may be required.
- if (li.isZeroLength(LIS.getSlotIndexes()) &&
+ if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
!li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
li.markNotSpillable();
- return;
+ return -1.0;
}
// If all of the definitions of the interval are re-materializable,
@@ -232,5 +320,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
- li.weight = normalize(totalWeight, li.getSize(), numInstr);
+ if (localSplitArtifact)
+ return normalize(totalWeight, start->distance(*end), numInstr);
+ return normalize(totalWeight, li.getSize(), numInstr);
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 7cad4d031169..3593089b206d 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -15,14 +15,14 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index b7fd45a3f6a6..c0d7eb4cf47b 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,16 +21,15 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
- initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
- initializeCountingFunctionInserterPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
+ initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
@@ -78,7 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeRABasicPass(Registry);
- initializeRAFastPass(Registry);
+ initializeRegAllocFastPass(Registry);
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
@@ -100,6 +99,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeVirtRegRewriterPass(Registry);
initializeWinEHPreparePass(Registry);
initializeXRayInstrumentationPass(Registry);
+ initializeMIRCanonicalizerPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index dc02a00e0fcc..c4794380f791 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -13,13 +13,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -28,38 +32,69 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -75,6 +110,12 @@ STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
+STATISTIC(NumMemoryInstsPhiCreated,
+ "Number of phis created when address "
+ "computations were sunk to memory instructions");
+STATISTIC(NumMemoryInstsSelectCreated,
+ "Number of select created when address "
+ "computations were sunk to memory instructions");
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumAndsAdded,
@@ -85,12 +126,6 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
-STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
-STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
-STATISTIC(NumMemCmpGreaterThanMax,
- "Number of memcmp calls with size greater than max size");
-STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
-
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
@@ -151,25 +186,51 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."), cl::init(true));
-static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
- "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
- cl::desc("The number of loads per basic block for inline expansion of "
- "memcmp that is only being compared against zero."));
+static cl::opt<bool> DisableComplexAddrModes(
+ "disable-complex-addr-modes", cl::Hidden, cl::init(false),
+ cl::desc("Disables combining addressing modes with different parts "
+ "in optimizeMemoryInst."));
+
+static cl::opt<bool>
+AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
+ cl::desc("Allow creation of Phis in Address sinking."));
+
+static cl::opt<bool>
+AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false),
+ cl::desc("Allow creation of selects in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseReg(
+ "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseReg field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseGV(
+ "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseGV field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseOffs(
+ "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of BaseOffs field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineScaledReg(
+ "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
+ cl::desc("Allow combining of ScaledReg field in Address sinking."));
namespace {
-typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
-typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
-typedef SmallVector<Instruction *, 16> SExts;
-typedef DenseMap<Value *, SExts> ValueToSExts;
+
+using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
+using TypeIsSExt = PointerIntPair<Type *, 1, bool>;
+using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
+using SExts = SmallVector<Instruction *, 16>;
+using ValueToSExts = DenseMap<Value *, SExts>;
+
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
- const TargetMachine *TM;
+ const TargetMachine *TM = nullptr;
const TargetSubtargetInfo *SubtargetInfo;
- const TargetLowering *TLI;
+ const TargetLowering *TLI = nullptr;
const TargetRegisterInfo *TRI;
- const TargetTransformInfo *TTI;
+ const TargetTransformInfo *TTI = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
@@ -181,11 +242,14 @@ class TypePromotionTransaction;
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
- /// multiple load/stores of the same address.
- ValueMap<Value*, Value*> SunkAddrs;
+ /// multiple load/stores of the same address. The usage of WeakTrackingVH
+ /// enables SunkAddrs to be treated as a cache whose entries can be
+ /// invalidated if a sunken address computation has been erased.
+ ValueMap<Value*, WeakTrackingVH> SunkAddrs;
/// Keeps track of all instructions inserted for the current function.
SetOfInstrs InsertedInsts;
+
/// Keeps track of the type of the related instruction before their
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
@@ -206,15 +270,15 @@ class TypePromotionTransaction;
bool OptSize;
/// DataLayout for the Function being processed.
- const DataLayout *DL;
+ const DataLayout *DL = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
- CodeGenPrepare()
- : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),
- DL(nullptr) {
+
+ CodeGenPrepare() : FunctionPass(ID) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
+
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "CodeGen Prepare"; }
@@ -264,11 +328,12 @@ class TypePromotionTransaction;
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
- bool splitIndirectCriticalEdges(Function &F);
};
-}
+
+} // end anonymous namespace
char CodeGenPrepare::ID = 0;
+
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
@@ -302,9 +367,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
- ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
else if (PSI->isFunctionColdInCallGraph(&F))
@@ -313,7 +378,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
+ TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
BasicBlock* BB = &*F.begin();
@@ -340,7 +406,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
- EverMadeChange |= splitIndirectCriticalEdges(F);
+ EverMadeChange |= SplitIndirectBrCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
@@ -485,160 +551,6 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
return DestBB;
}
-// Return the unique indirectbr predecessor of a block. This may return null
-// even if such a predecessor exists, if it's not useful for splitting.
-// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
-// predecessors of BB.
-static BasicBlock *
-findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
- // If the block doesn't have any PHIs, we don't care about it, since there's
- // no point in splitting it.
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
- if (!PN)
- return nullptr;
-
- // Verify we have exactly one IBR predecessor.
- // Conservatively bail out if one of the other predecessors is not a "regular"
- // terminator (that is, not a switch or a br).
- BasicBlock *IBB = nullptr;
- for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
- BasicBlock *PredBB = PN->getIncomingBlock(Pred);
- TerminatorInst *PredTerm = PredBB->getTerminator();
- switch (PredTerm->getOpcode()) {
- case Instruction::IndirectBr:
- if (IBB)
- return nullptr;
- IBB = PredBB;
- break;
- case Instruction::Br:
- case Instruction::Switch:
- OtherPreds.push_back(PredBB);
- continue;
- default:
- return nullptr;
- }
- }
-
- return IBB;
-}
-
-// Split critical edges where the source of the edge is an indirectbr
-// instruction. This isn't always possible, but we can handle some easy cases.
-// This is useful because MI is unable to split such critical edges,
-// which means it will not be able to sink instructions along those edges.
-// This is especially painful for indirect branches with many successors, where
-// we end up having to prepare all outgoing values in the origin block.
-//
-// Our normal algorithm for splitting critical edges requires us to update
-// the outgoing edges of the edge origin block, but for an indirectbr this
-// is hard, since it would require finding and updating the block addresses
-// the indirect branch uses. But if a block only has a single indirectbr
-// predecessor, with the others being regular branches, we can do it in a
-// different way.
-// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
-// We can split D into D0 and D1, where D0 contains only the PHIs from D,
-// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
-// create the following structure:
-// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
-bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
- // Check whether the function has any indirectbrs, and collect which blocks
- // they may jump to. Since most functions don't have indirect branches,
- // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
- SmallSetVector<BasicBlock *, 16> Targets;
- for (auto &BB : F) {
- auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
- if (!IBI)
- continue;
-
- for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
- Targets.insert(IBI->getSuccessor(Succ));
- }
-
- if (Targets.empty())
- return false;
-
- bool Changed = false;
- for (BasicBlock *Target : Targets) {
- SmallVector<BasicBlock *, 16> OtherPreds;
- BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
- // If we did not found an indirectbr, or the indirectbr is the only
- // incoming edge, this isn't the kind of edge we're looking for.
- if (!IBRPred || OtherPreds.empty())
- continue;
-
- // Don't even think about ehpads/landingpads.
- Instruction *FirstNonPHI = Target->getFirstNonPHI();
- if (FirstNonPHI->isEHPad() || Target->isLandingPad())
- continue;
-
- BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
- // It's possible Target was its own successor through an indirectbr.
- // In this case, the indirectbr now comes from BodyBlock.
- if (IBRPred == Target)
- IBRPred = BodyBlock;
-
- // At this point Target only has PHIs, and BodyBlock has the rest of the
- // block's body. Create a copy of Target that will be used by the "direct"
- // preds.
- ValueToValueMapTy VMap;
- BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
-
- for (BasicBlock *Pred : OtherPreds) {
- // If the target is a loop to itself, then the terminator of the split
- // block needs to be updated.
- if (Pred == Target)
- BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
- else
- Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
- }
-
- // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
- // they are clones, so the number of PHIs are the same.
- // (a) Remove the edge coming from IBRPred from the "Direct" PHI
- // (b) Leave that as the only edge in the "Indirect" PHI.
- // (c) Merge the two in the body block.
- BasicBlock::iterator Indirect = Target->begin(),
- End = Target->getFirstNonPHI()->getIterator();
- BasicBlock::iterator Direct = DirectSucc->begin();
- BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
-
- assert(&*End == Target->getTerminator() &&
- "Block was expected to only contain PHIs");
-
- while (Indirect != End) {
- PHINode *DirPHI = cast<PHINode>(Direct);
- PHINode *IndPHI = cast<PHINode>(Indirect);
-
- // Now, clean up - the direct block shouldn't get the indirect value,
- // and vice versa.
- DirPHI->removeIncomingValue(IBRPred);
- Direct++;
-
- // Advance the pointer here, to avoid invalidation issues when the old
- // PHI is erased.
- Indirect++;
-
- PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
- NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
- IBRPred);
-
- // Create a PHI in the body block, to merge the direct and indirect
- // predecessors.
- PHINode *MergePHI =
- PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
- MergePHI->addIncoming(NewIndPHI, Target);
- MergePHI->addIncoming(DirPHI, DirectSucc);
-
- IndPHI->replaceAllUsesWith(MergePHI);
- IndPHI->eraseFromParent();
- }
-
- Changed = true;
- }
-
- return Changed;
-}
-
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -827,7 +739,6 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
return true;
}
-
/// Eliminate a basic block that has only phi's and an unconditional branch in
/// it.
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
@@ -948,6 +859,21 @@ static bool
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
const SmallVectorImpl<GCRelocateInst *> &Targets) {
bool MadeChange = false;
+ // We must ensure the relocation of derived pointer is defined after
+ // relocation of base pointer. If we find a relocation corresponding to base
+ // defined earlier than relocation of base then we move relocation of base
+ // right before found relocation. We consider only relocation in the same
+ // basic block as relocation of base. Relocations from other basic block will
+ // be skipped by optimization and we do not care about them.
+ for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
+ &*R != RelocatedBase; ++R)
+ if (auto RI = dyn_cast<GCRelocateInst>(R))
+ if (RI->getStatepoint() == RelocatedBase->getStatepoint())
+ if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
+ RelocatedBase->moveBefore(RI);
+ break;
+ }
+
for (GCRelocateInst *ToReplace : Targets) {
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
"Not relocating a derived object of the original base object");
@@ -1125,6 +1051,7 @@ static bool SinkCast(CastInst *CI) {
// If we removed all uses, nuke the cast.
if (CI->use_empty()) {
+ salvageDebugInfo(*CI);
CI->eraseFromParent();
MadeChange = true;
}
@@ -1137,7 +1064,6 @@ static bool SinkCast(CastInst *CI) {
/// reduce the number of virtual registers that must be created and coalesced.
///
/// Return true if any changes are made.
-///
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
const DataLayout &DL) {
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
@@ -1641,656 +1567,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return true;
}
-// This class provides helper functions to expand a memcmp library call into an
-// inline expansion.
-class MemCmpExpansion {
- struct ResultBlock {
- BasicBlock *BB;
- PHINode *PhiSrc1;
- PHINode *PhiSrc2;
- ResultBlock();
- };
-
- CallInst *CI;
- ResultBlock ResBlock;
- unsigned MaxLoadSize;
- unsigned NumBlocks;
- unsigned NumBlocksNonOneByte;
- unsigned NumLoadsPerBlock;
- std::vector<BasicBlock *> LoadCmpBlocks;
- BasicBlock *EndBlock;
- PHINode *PhiRes;
- bool IsUsedForZeroCmp;
- const DataLayout &DL;
- IRBuilder<> Builder;
-
- unsigned calculateNumBlocks(unsigned Size);
- void createLoadCmpBlocks();
- void createResultBlock();
- void setupResultBlockPHINodes();
- void setupEndBlockPHINodes();
- void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
- unsigned GEPIndex);
- Value *getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed);
- void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed);
- void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
- void emitMemCmpResultBlock();
- Value *getMemCmpExpansionZeroCase(unsigned Size);
- Value *getMemCmpEqZeroOneBlock(unsigned Size);
- Value *getMemCmpOneBlock(unsigned Size);
- unsigned getLoadSize(unsigned Size);
- unsigned getNumLoads(unsigned Size);
-
-public:
- MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
- unsigned NumLoadsPerBlock, const DataLayout &DL);
- Value *getMemCmpExpansion(uint64_t Size);
-};
-
-MemCmpExpansion::ResultBlock::ResultBlock()
- : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}
-
-// Initialize the basic block structure required for expansion of memcmp call
-// with given maximum load size and memcmp size parameter.
-// This structure includes:
-// 1. A list of load compare blocks - LoadCmpBlocks.
-// 2. An EndBlock, split from original instruction point, which is the block to
-// return from.
-// 3. ResultBlock, block to branch to for early exit when a
-// LoadCmpBlock finds a difference.
-MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
- unsigned MaxLoadSize, unsigned LoadsPerBlock,
- const DataLayout &TheDataLayout)
- : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
- DL(TheDataLayout), Builder(CI) {
-
- // A memcmp with zero-comparison with only one block of load and compare does
- // not need to set up any extra blocks. This case could be handled in the DAG,
- // but since we have all of the machinery to flexibly expand any memcpy here,
- // we choose to handle this case too to avoid fragmented lowering.
- IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- NumBlocks = calculateNumBlocks(Size);
- if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
- BasicBlock *StartBlock = CI->getParent();
- EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
- setupEndBlockPHINodes();
- createResultBlock();
-
- // If return value of memcmp is not used in a zero equality, we need to
- // calculate which source was larger. The calculation requires the
- // two loaded source values of each load compare block.
- // These will be saved in the phi nodes created by setupResultBlockPHINodes.
- if (!IsUsedForZeroCmp)
- setupResultBlockPHINodes();
-
- // Create the number of required load compare basic blocks.
- createLoadCmpBlocks();
-
- // Update the terminator added by splitBasicBlock to branch to the first
- // LoadCmpBlock.
- StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
- }
-
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-}
-
-void MemCmpExpansion::createLoadCmpBlocks() {
- for (unsigned i = 0; i < NumBlocks; i++) {
- BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
- EndBlock->getParent(), EndBlock);
- LoadCmpBlocks.push_back(BB);
- }
-}
-
-void MemCmpExpansion::createResultBlock() {
- ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
- EndBlock->getParent(), EndBlock);
-}
-
-// This function creates the IR instructions for loading and comparing 1 byte.
-// It loads 1 byte from each source of the memcmp parameters with the given
-// GEPIndex. It then subtracts the two loaded values and adds this result to the
-// final phi node for selecting the memcmp result.
-void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
- unsigned GEPIndex) {
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- Builder.SetInsertPoint(LoadCmpBlocks[Index]);
- Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using the GEPIndex.
- if (GEPIndex != 0) {
- Source1 = Builder.CreateGEP(LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, GEPIndex));
- Source2 = Builder.CreateGEP(LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, GEPIndex));
- }
-
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
- Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
-
- PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
-
- if (Index < (LoadCmpBlocks.size() - 1)) {
- // Early exit branch if difference found to EndBlock. Otherwise, continue to
- // next LoadCmpBlock,
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
- ConstantInt::get(Diff->getType(), 0));
- BranchInst *CmpBr =
- BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
- Builder.Insert(CmpBr);
- } else {
- // The last block has an unconditional branch to EndBlock.
- BranchInst *CmpBr = BranchInst::Create(EndBlock);
- Builder.Insert(CmpBr);
- }
-}
-
-unsigned MemCmpExpansion::getNumLoads(unsigned Size) {
- return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);
-}
-
-unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
- return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
-}
-
-/// Generate an equality comparison for one or more pairs of loaded values.
-/// This is used in the case where the memcmp() call is compared equal or not
-/// equal to zero.
-Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
- unsigned &NumBytesProcessed) {
- std::vector<Value *> XorList, OrList;
- Value *Diff;
-
- unsigned RemainingBytes = Size - NumBytesProcessed;
- unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
- unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
-
- // For a single-block expansion, start inserting before the memcmp call.
- if (LoadCmpBlocks.empty())
- Builder.SetInsertPoint(CI);
- else
- Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-
- Value *Cmp = nullptr;
- for (unsigned i = 0; i < NumLoads; ++i) {
- unsigned LoadSize = getLoadSize(RemainingBytes);
- unsigned GEPIndex = NumBytesProcessed / LoadSize;
- NumBytesProcessed += LoadSize;
- RemainingBytes -= LoadSize;
-
- Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
- Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
- assert(LoadSize <= MaxLoadSize && "Unexpected load type");
-
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using the GEPIndex.
- if (GEPIndex != 0) {
- Source1 = Builder.CreateGEP(LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, GEPIndex));
- Source2 = Builder.CreateGEP(LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, GEPIndex));
- }
-
- // Get a constant or load a value for each source address.
- Value *LoadSrc1 = nullptr;
- if (auto *Source1C = dyn_cast<Constant>(Source1))
- LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
- if (!LoadSrc1)
- LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-
- Value *LoadSrc2 = nullptr;
- if (auto *Source2C = dyn_cast<Constant>(Source2))
- LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
- if (!LoadSrc2)
- LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- if (NumLoads != 1) {
- if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
- }
- // If we have multiple loads per block, we need to generate a composite
- // comparison using xor+or.
- Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
- Diff = Builder.CreateZExt(Diff, MaxLoadType);
- XorList.push_back(Diff);
- } else {
- // If there's only one load per block, we just compare the loaded values.
- Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
- }
- }
-
- auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
- std::vector<Value *> OutList;
- for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
- Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
- OutList.push_back(Or);
- }
- if (InList.size() % 2 != 0)
- OutList.push_back(InList.back());
- return OutList;
- };
-
- if (!Cmp) {
- // Pairwise OR the XOR results.
- OrList = pairWiseOr(XorList);
-
- // Pairwise OR the OR results until one result left.
- while (OrList.size() != 1) {
- OrList = pairWiseOr(OrList);
- }
- Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
- }
-
- return Cmp;
-}
-
-void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
- unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
- Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
-
- BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
- ? EndBlock
- : LoadCmpBlocks[Index + 1];
- // Early exit branch if difference found to ResultBlock. Otherwise,
- // continue to next LoadCmpBlock or EndBlock.
- BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
- Builder.Insert(CmpBr);
-
- // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
- // since early exit to ResultBlock was not taken (no difference was found in
- // any of the bytes).
- if (Index == LoadCmpBlocks.size() - 1) {
- Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
- PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
- }
-}
-
-// This function creates the IR intructions for loading and comparing using the
-// given LoadSize. It loads the number of bytes specified by LoadSize from each
-// source of the memcmp parameters. It then does a subtract to see if there was
-// a difference in the loaded values. If a difference is found, it branches
-// with an early exit to the ResultBlock for calculating which source was
-// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
-// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
-// a special case through emitLoadCompareByteBlock. The special handling can
-// simply subtract the loaded values and add it to the result phi node.
-void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
- unsigned GEPIndex) {
- if (LoadSize == 1) {
- MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
- return;
- }
-
- Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
- Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
- assert(LoadSize <= MaxLoadSize && "Unexpected load type");
-
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- Builder.SetInsertPoint(LoadCmpBlocks[Index]);
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Get the base address using the GEPIndex.
- if (GEPIndex != 0) {
- Source1 = Builder.CreateGEP(LoadSizeType, Source1,
- ConstantInt::get(LoadSizeType, GEPIndex));
- Source2 = Builder.CreateGEP(LoadSizeType, Source2,
- ConstantInt::get(LoadSizeType, GEPIndex));
- }
-
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- if (DL.isLittleEndian()) {
- Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
- LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
- LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
- }
-
- if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
- }
-
- // Add the loaded values to the phi nodes for calculating memcmp result only
- // if result is not used in a zero equality.
- if (!IsUsedForZeroCmp) {
- ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);
- ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
- }
-
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
- BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
- ? EndBlock
- : LoadCmpBlocks[Index + 1];
- // Early exit branch if difference found to ResultBlock. Otherwise, continue
- // to next LoadCmpBlock or EndBlock.
- BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
- Builder.Insert(CmpBr);
-
- // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
- // since early exit to ResultBlock was not taken (no difference was found in
- // any of the bytes).
- if (Index == LoadCmpBlocks.size() - 1) {
- Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
- PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
- }
-}
-
-// This function populates the ResultBlock with a sequence to calculate the
-// memcmp result. It compares the two loaded source values and returns -1 if
-// src1 < src2 and 1 if src1 > src2.
-void MemCmpExpansion::emitMemCmpResultBlock() {
- // Special case: if memcmp result is used in a zero equality, result does not
- // need to be calculated and can simply return 1.
- if (IsUsedForZeroCmp) {
- BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
- Builder.SetInsertPoint(ResBlock.BB, InsertPt);
- Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
- PhiRes->addIncoming(Res, ResBlock.BB);
- BranchInst *NewBr = BranchInst::Create(EndBlock);
- Builder.Insert(NewBr);
- return;
- }
- BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
- Builder.SetInsertPoint(ResBlock.BB, InsertPt);
-
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
- ResBlock.PhiSrc2);
-
- Value *Res =
- Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
- ConstantInt::get(Builder.getInt32Ty(), 1));
-
- BranchInst *NewBr = BranchInst::Create(EndBlock);
- Builder.Insert(NewBr);
- PhiRes->addIncoming(Res, ResBlock.BB);
-}
-
-unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
- unsigned NumBlocks = 0;
- bool HaveOneByteLoad = false;
- unsigned RemainingSize = Size;
- unsigned LoadSize = MaxLoadSize;
- while (RemainingSize) {
- if (LoadSize == 1)
- HaveOneByteLoad = true;
- NumBlocks += RemainingSize / LoadSize;
- RemainingSize = RemainingSize % LoadSize;
- LoadSize = LoadSize / 2;
- }
- NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
-
- if (IsUsedForZeroCmp)
- NumBlocks = NumBlocks / NumLoadsPerBlock +
- (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);
-
- return NumBlocks;
-}
-
-void MemCmpExpansion::setupResultBlockPHINodes() {
- Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
- Builder.SetInsertPoint(ResBlock.BB);
- ResBlock.PhiSrc1 =
- Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");
- ResBlock.PhiSrc2 =
- Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");
-}
-
-void MemCmpExpansion::setupEndBlockPHINodes() {
- Builder.SetInsertPoint(&EndBlock->front());
- PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
-}
-
-Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
- unsigned NumBytesProcessed = 0;
- // This loop populates each of the LoadCmpBlocks with the IR sequence to
- // handle multiple loads per block.
- for (unsigned i = 0; i < NumBlocks; ++i)
- emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
-
- emitMemCmpResultBlock();
- return PhiRes;
-}
-
-/// A memcmp expansion that compares equality with 0 and only has one block of
-/// load and compare can bypass the compare, branch, and phi IR that is required
-/// in the general case.
-Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
- unsigned NumBytesProcessed = 0;
- Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
- return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
-}
-
-/// A memcmp expansion that only has one block of load and compare can bypass
-/// the compare, branch, and phi IR that is required in the general case.
-Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
- assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
-
- Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
- Value *Source1 = CI->getArgOperand(0);
- Value *Source2 = CI->getArgOperand(1);
-
- // Cast source to LoadSizeType*.
- if (Source1->getType() != LoadSizeType)
- Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
- if (Source2->getType() != LoadSizeType)
- Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
- if (DL.isLittleEndian() && Size != 1) {
- Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
- Intrinsic::bswap, LoadSizeType);
- LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
- LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
- }
-
- // TODO: Instead of comparing ULT, just subtract and return the difference?
- Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
- Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
- Type *I32 = Builder.getInt32Ty();
- Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
- ConstantInt::get(I32, 1));
- return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
-}
-
-// This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
-Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
- if (IsUsedForZeroCmp)
- return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
- getMemCmpExpansionZeroCase(Size);
-
- // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
- if (NumBlocks == 1 && NumLoadsPerBlock == 1)
- return getMemCmpOneBlock(Size);
-
- // This loop calls emitLoadCompareBlock for comparing Size bytes of the two
- // memcmp sources. It starts with loading using the maximum load size set by
- // the target. It processes any remaining bytes using a load size which is the
- // next smallest power of 2.
- unsigned LoadSize = MaxLoadSize;
- unsigned NumBytesToBeProcessed = Size;
- unsigned Index = 0;
- while (NumBytesToBeProcessed) {
- // Calculate how many blocks we can create with the current load size.
- unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
- unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
- NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
-
- // For each NumBlocks, populate the instruction sequence for loading and
- // comparing LoadSize bytes.
- while (NumBlocks--) {
- emitLoadCompareBlock(Index, LoadSize, GEPIndex);
- Index++;
- GEPIndex++;
- }
- // Get the next LoadSize to use.
- LoadSize = LoadSize / 2;
- }
-
- emitMemCmpResultBlock();
- return PhiRes;
-}
-
-// This function checks to see if an expansion of memcmp can be generated.
-// It checks for constant compare size that is less than the max inline size.
-// If an expansion cannot occur, returns false to leave as a library call.
-// Otherwise, the library call is replaced with a new IR instruction sequence.
-/// We want to transform:
-/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
-/// To:
-/// loadbb:
-/// %0 = bitcast i32* %buffer2 to i8*
-/// %1 = bitcast i32* %buffer1 to i8*
-/// %2 = bitcast i8* %1 to i64*
-/// %3 = bitcast i8* %0 to i64*
-/// %4 = load i64, i64* %2
-/// %5 = load i64, i64* %3
-/// %6 = call i64 @llvm.bswap.i64(i64 %4)
-/// %7 = call i64 @llvm.bswap.i64(i64 %5)
-/// %8 = sub i64 %6, %7
-/// %9 = icmp ne i64 %8, 0
-/// br i1 %9, label %res_block, label %loadbb1
-/// res_block: ; preds = %loadbb2,
-/// %loadbb1, %loadbb
-/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
-/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
-/// %10 = icmp ult i64 %phi.src1, %phi.src2
-/// %11 = select i1 %10, i32 -1, i32 1
-/// br label %endblock
-/// loadbb1: ; preds = %loadbb
-/// %12 = bitcast i32* %buffer2 to i8*
-/// %13 = bitcast i32* %buffer1 to i8*
-/// %14 = bitcast i8* %13 to i32*
-/// %15 = bitcast i8* %12 to i32*
-/// %16 = getelementptr i32, i32* %14, i32 2
-/// %17 = getelementptr i32, i32* %15, i32 2
-/// %18 = load i32, i32* %16
-/// %19 = load i32, i32* %17
-/// %20 = call i32 @llvm.bswap.i32(i32 %18)
-/// %21 = call i32 @llvm.bswap.i32(i32 %19)
-/// %22 = zext i32 %20 to i64
-/// %23 = zext i32 %21 to i64
-/// %24 = sub i64 %22, %23
-/// %25 = icmp ne i64 %24, 0
-/// br i1 %25, label %res_block, label %loadbb2
-/// loadbb2: ; preds = %loadbb1
-/// %26 = bitcast i32* %buffer2 to i8*
-/// %27 = bitcast i32* %buffer1 to i8*
-/// %28 = bitcast i8* %27 to i16*
-/// %29 = bitcast i8* %26 to i16*
-/// %30 = getelementptr i16, i16* %28, i16 6
-/// %31 = getelementptr i16, i16* %29, i16 6
-/// %32 = load i16, i16* %30
-/// %33 = load i16, i16* %31
-/// %34 = call i16 @llvm.bswap.i16(i16 %32)
-/// %35 = call i16 @llvm.bswap.i16(i16 %33)
-/// %36 = zext i16 %34 to i64
-/// %37 = zext i16 %35 to i64
-/// %38 = sub i64 %36, %37
-/// %39 = icmp ne i64 %38, 0
-/// br i1 %39, label %res_block, label %loadbb3
-/// loadbb3: ; preds = %loadbb2
-/// %40 = bitcast i32* %buffer2 to i8*
-/// %41 = bitcast i32* %buffer1 to i8*
-/// %42 = getelementptr i8, i8* %41, i8 14
-/// %43 = getelementptr i8, i8* %40, i8 14
-/// %44 = load i8, i8* %42
-/// %45 = load i8, i8* %43
-/// %46 = zext i8 %44 to i32
-/// %47 = zext i8 %45 to i32
-/// %48 = sub i32 %46, %47
-/// br label %endblock
-/// endblock: ; preds = %res_block,
-/// %loadbb3
-/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
-/// ret i32 %phi.res
-static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL) {
- NumMemCmpCalls++;
-
- // TTI call to check if target would like to expand memcmp. Also, get the
- // MaxLoadSize.
- unsigned MaxLoadSize;
- if (!TTI->expandMemCmp(CI, MaxLoadSize))
- return false;
-
- // Early exit from expansion if -Oz.
- if (CI->getFunction()->optForMinSize())
- return false;
-
- // Early exit from expansion if size is not a constant.
- ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!SizeCast) {
- NumMemCmpNotConstant++;
- return false;
- }
-
- // Early exit from expansion if size greater than max bytes to load.
- uint64_t SizeVal = SizeCast->getZExtValue();
- unsigned NumLoads = 0;
- unsigned RemainingSize = SizeVal;
- unsigned LoadSize = MaxLoadSize;
- while (RemainingSize) {
- NumLoads += RemainingSize / LoadSize;
- RemainingSize = RemainingSize % LoadSize;
- LoadSize = LoadSize / 2;
- }
-
- if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
- NumMemCmpGreaterThanMax++;
- return false;
- }
-
- NumMemCmpInlined++;
-
- // MemCmpHelper object creates and sets up basic blocks required for
- // expanding memcmp with size SizeVal.
- unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
- MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);
-
- Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);
-
- // Replace call with result of expansion and erase call.
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
-
- return true;
-}
-
bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
BasicBlock *BB = CI->getParent();
@@ -2443,12 +1719,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
return true;
}
- LibFunc Func;
- if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&
- Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {
- ModifiedDT = true;
- return true;
- }
return false;
}
@@ -2599,19 +1869,125 @@ namespace {
/// This is an extended version of TargetLowering::AddrMode
/// which holds actual Value*'s for register values.
struct ExtAddrMode : public TargetLowering::AddrMode {
- Value *BaseReg;
- Value *ScaledReg;
- ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
+ Value *BaseReg = nullptr;
+ Value *ScaledReg = nullptr;
+ Value *OriginalValue = nullptr;
+
+ enum FieldName {
+ NoField = 0x00,
+ BaseRegField = 0x01,
+ BaseGVField = 0x02,
+ BaseOffsField = 0x04,
+ ScaledRegField = 0x08,
+ ScaleField = 0x10,
+ MultipleFields = 0xff
+ };
+
+ ExtAddrMode() = default;
+
void print(raw_ostream &OS) const;
void dump() const;
- bool operator==(const ExtAddrMode& O) const {
- return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
- (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
- (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+ FieldName compare(const ExtAddrMode &other) {
+ // First check that the types are the same on each field, as differing types
+ // is something we can't cope with later on.
+ if (BaseReg && other.BaseReg &&
+ BaseReg->getType() != other.BaseReg->getType())
+ return MultipleFields;
+ if (BaseGV && other.BaseGV &&
+ BaseGV->getType() != other.BaseGV->getType())
+ return MultipleFields;
+ if (ScaledReg && other.ScaledReg &&
+ ScaledReg->getType() != other.ScaledReg->getType())
+ return MultipleFields;
+
+ // Check each field to see if it differs.
+ unsigned Result = NoField;
+ if (BaseReg != other.BaseReg)
+ Result |= BaseRegField;
+ if (BaseGV != other.BaseGV)
+ Result |= BaseGVField;
+ if (BaseOffs != other.BaseOffs)
+ Result |= BaseOffsField;
+ if (ScaledReg != other.ScaledReg)
+ Result |= ScaledRegField;
+ // Don't count 0 as being a different scale, because that actually means
+ // unscaled (which will already be counted by having no ScaledReg).
+ if (Scale && other.Scale && Scale != other.Scale)
+ Result |= ScaleField;
+
+ if (countPopulation(Result) > 1)
+ return MultipleFields;
+ else
+ return static_cast<FieldName>(Result);
+ }
+
+ // An AddrMode is trivial if it involves no calculation i.e. it is just a base
+ // with no offset.
+ bool isTrivial() {
+ // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
+ // trivial if at most one of these terms is nonzero, except that BaseGV and
+ // BaseReg both being zero actually means a null pointer value, which we
+ // consider to be 'non-zero' here.
+ return !BaseOffs && !Scale && !(BaseGV && BaseReg);
+ }
+
+ Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
+ switch (Field) {
+ default:
+ return nullptr;
+ case BaseRegField:
+ return BaseReg;
+ case BaseGVField:
+ return BaseGV;
+ case ScaledRegField:
+ return ScaledReg;
+ case BaseOffsField:
+ return ConstantInt::get(IntPtrTy, BaseOffs);
+ }
+ }
+
+ void SetCombinedField(FieldName Field, Value *V,
+ const SmallVectorImpl<ExtAddrMode> &AddrModes) {
+ switch (Field) {
+ default:
+ llvm_unreachable("Unhandled fields are expected to be rejected earlier");
+ break;
+ case ExtAddrMode::BaseRegField:
+ BaseReg = V;
+ break;
+ case ExtAddrMode::BaseGVField:
+ // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
+ // in the BaseReg field.
+ assert(BaseReg == nullptr);
+ BaseReg = V;
+ BaseGV = nullptr;
+ break;
+ case ExtAddrMode::ScaledRegField:
+ ScaledReg = V;
+ // If we have a mix of scaled and unscaled addrmodes then we want scale
+ // to be the scale and not zero.
+ if (!Scale)
+ for (const ExtAddrMode &AM : AddrModes)
+ if (AM.Scale) {
+ Scale = AM.Scale;
+ break;
+ }
+ break;
+ case ExtAddrMode::BaseOffsField:
+ // The offset is no longer a constant, so it goes in ScaledReg with a
+ // scale of 1.
+ assert(ScaledReg == nullptr);
+ ScaledReg = V;
+ Scale = 1;
+ BaseOffs = 0;
+ break;
+ }
}
};
+} // end anonymous namespace
+
#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
@@ -2619,6 +1995,7 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
}
#endif
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
OS << "[";
@@ -2650,18 +2027,18 @@ void ExtAddrMode::print(raw_ostream &OS) const {
OS << ']';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
}
#endif
+namespace {
+
/// \brief This class provides transaction based operation on the IR.
/// Every change made through this class is recorded in the internal state and
/// can be undone (rollback) until commit is called.
class TypePromotionTransaction {
-
/// \brief This represents the common interface of the individual transaction.
/// Each class implements the logic for doing one specific modification on
/// the IR via the TypePromotionTransaction.
@@ -2675,7 +2052,7 @@ class TypePromotionTransaction {
/// The constructor performs the related action on the IR.
TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
- virtual ~TypePromotionAction() {}
+ virtual ~TypePromotionAction() = default;
/// \brief Undo the modification done by this action.
/// When this method is called, the IR must be in the same state as it was
@@ -2702,6 +2079,7 @@ class TypePromotionTransaction {
Instruction *PrevInst;
BasicBlock *BB;
} Point;
+
/// Remember whether or not the instruction had a previous instruction.
bool HasPrevInstruction;
@@ -2756,6 +2134,7 @@ class TypePromotionTransaction {
class OperandSetter : public TypePromotionAction {
/// Original operand of the instruction.
Value *Origin;
+
/// Index of the modified instruction.
unsigned Idx;
@@ -2813,6 +2192,7 @@ class TypePromotionTransaction {
/// \brief Build a truncate instruction.
class TruncBuilder : public TypePromotionAction {
Value *Val;
+
public:
/// \brief Build a truncate instruction of \p Opnd producing a \p Ty
/// result.
@@ -2837,6 +2217,7 @@ class TypePromotionTransaction {
/// \brief Build a sign extension instruction.
class SExtBuilder : public TypePromotionAction {
Value *Val;
+
public:
/// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
/// result.
@@ -2862,6 +2243,7 @@ class TypePromotionTransaction {
/// \brief Build a zero extension instruction.
class ZExtBuilder : public TypePromotionAction {
Value *Val;
+
public:
/// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
/// result.
@@ -2912,15 +2294,18 @@ class TypePromotionTransaction {
struct InstructionAndIdx {
/// The instruction using the instruction.
Instruction *Inst;
+
/// The index where this instruction is used for Inst.
unsigned Idx;
+
InstructionAndIdx(Instruction *Inst, unsigned Idx)
: Inst(Inst), Idx(Idx) {}
};
/// Keep track of the original uses (pair Instruction, Index).
SmallVector<InstructionAndIdx, 4> OriginalUses;
- typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
+
+ using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
public:
/// \brief Replace all the use of \p Inst by \p New.
@@ -2951,11 +2336,14 @@ class TypePromotionTransaction {
class InstructionRemover : public TypePromotionAction {
/// Original position of the instruction.
InsertionHandler Inserter;
+
/// Helper structure to hide all the link to the instruction. In other
/// words, this helps to do as if the instruction was removed.
OperandsHider Hider;
+
/// Keep track of the uses replaced, if any.
- UsesReplacer *Replacer;
+ UsesReplacer *Replacer = nullptr;
+
/// Keep track of instructions removed.
SetOfInstrs &RemovedInsts;
@@ -2967,7 +2355,7 @@ class TypePromotionTransaction {
InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr), RemovedInsts(RemovedInsts) {
+ RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
@@ -2996,15 +2384,17 @@ public:
/// Restoration point.
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
- typedef const TypePromotionAction *ConstRestorationPt;
+ using ConstRestorationPt = const TypePromotionAction *;
TypePromotionTransaction(SetOfInstrs &RemovedInsts)
: RemovedInsts(RemovedInsts) {}
/// Advocate every changes made in that transaction.
void commit();
+
/// Undo all the changes made after the given point.
void rollback(ConstRestorationPt Point);
+
/// Get the current restoration point.
ConstRestorationPt getRestorationPoint() const;
@@ -3012,18 +2402,25 @@ public:
/// @{
/// Same as Instruction::setOperand.
void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
+
/// Same as Instruction::eraseFromParent.
void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
+
/// Same as Value::replaceAllUsesWith.
void replaceAllUsesWith(Instruction *Inst, Value *New);
+
/// Same as Value::mutateType.
void mutateType(Instruction *Inst, Type *NewTy);
+
/// Same as IRBuilder::createTrunc.
Value *createTrunc(Instruction *Opnd, Type *Ty);
+
/// Same as IRBuilder::createSExt.
Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
/// Same as IRBuilder::createZExt.
Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
/// Same as Instruction::moveBefore.
void moveBefore(Instruction *Inst, Instruction *Before);
/// @}
@@ -3031,30 +2428,36 @@ public:
private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
- typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+
+ using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
+
SetOfInstrs &RemovedInsts;
};
+} // end anonymous namespace
+
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
Value *NewVal) {
- Actions.push_back(
- make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
+ Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
+ Inst, Idx, NewVal));
}
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
- RemovedInsts, NewVal));
+ llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
+ Inst, RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
Value *New) {
- Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+ Actions.push_back(
+ llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
}
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
- Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+ Actions.push_back(
+ llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
@@ -3084,7 +2487,8 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst,
void TypePromotionTransaction::moveBefore(Instruction *Inst,
Instruction *Before) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
+ llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
+ Inst, Before));
}
TypePromotionTransaction::ConstRestorationPt
@@ -3107,6 +2511,8 @@ void TypePromotionTransaction::rollback(
}
}
+namespace {
+
/// \brief A helper class for matching addressing modes.
///
/// This encapsulates the logic for matching the target-legal addressing modes.
@@ -3128,8 +2534,10 @@ class AddressingModeMatcher {
/// The instructions inserted by other CodeGenPrepare optimizations.
const SetOfInstrs &InsertedInsts;
+
/// A map from the instructions to their type before promotion.
InstrToOrigTy &PromotedInsts;
+
/// The ongoing transaction where every action should be registered.
TypePromotionTransaction &TPT;
@@ -3151,8 +2559,8 @@ class AddressingModeMatcher {
PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
-public:
+public:
/// Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
@@ -3177,6 +2585,7 @@ public:
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
+
private:
bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
bool matchAddr(Value *V, unsigned Depth);
@@ -3190,6 +2599,520 @@ private:
Value *PromotedOperand) const;
};
+/// \brief Keep track of simplification of Phi nodes.
+/// Accept the set of all phi nodes and erase phi node from this set
+/// if it is simplified.
+class SimplificationTracker {
+ DenseMap<Value *, Value *> Storage;
+ const SimplifyQuery &SQ;
+ SmallPtrSetImpl<PHINode *> &AllPhiNodes;
+ SmallPtrSetImpl<SelectInst *> &AllSelectNodes;
+
+public:
+ SimplificationTracker(const SimplifyQuery &sq,
+ SmallPtrSetImpl<PHINode *> &APN,
+ SmallPtrSetImpl<SelectInst *> &ASN)
+ : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {}
+
+ Value *Get(Value *V) {
+ do {
+ auto SV = Storage.find(V);
+ if (SV == Storage.end())
+ return V;
+ V = SV->second;
+ } while (true);
+ }
+
+ Value *Simplify(Value *Val) {
+ SmallVector<Value *, 32> WorkList;
+ SmallPtrSet<Value *, 32> Visited;
+ WorkList.push_back(Val);
+ while (!WorkList.empty()) {
+ auto P = WorkList.pop_back_val();
+ if (!Visited.insert(P).second)
+ continue;
+ if (auto *PI = dyn_cast<Instruction>(P))
+ if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
+ for (auto *U : PI->users())
+ WorkList.push_back(cast<Value>(U));
+ Put(PI, V);
+ PI->replaceAllUsesWith(V);
+ if (auto *PHI = dyn_cast<PHINode>(PI))
+ AllPhiNodes.erase(PHI);
+ if (auto *Select = dyn_cast<SelectInst>(PI))
+ AllSelectNodes.erase(Select);
+ PI->eraseFromParent();
+ }
+ }
+ return Get(Val);
+ }
+
+ void Put(Value *From, Value *To) {
+ Storage.insert({ From, To });
+ }
+};
+
+/// \brief A helper class for combining addressing modes.
+class AddressingModeCombiner {
+ typedef std::pair<Value *, BasicBlock *> ValueInBB;
+ typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
+ typedef std::pair<PHINode *, PHINode *> PHIPair;
+
+private:
+ /// The addressing modes we've collected.
+ SmallVector<ExtAddrMode, 16> AddrModes;
+
+ /// The field in which the AddrModes differ, when we have more than one.
+ ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
+
+ /// Are the AddrModes that we have all just equal to their original values?
+ bool AllAddrModesTrivial = true;
+
+ /// Common Type for all different fields in addressing modes.
+ Type *CommonType;
+
+ /// SimplifyQuery for simplifyInstruction utility.
+ const SimplifyQuery &SQ;
+
+ /// Original Address.
+ ValueInBB Original;
+
+public:
+ AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
+ : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
+
+ /// \brief Get the combined AddrMode
+ const ExtAddrMode &getAddrMode() const {
+ return AddrModes[0];
+ }
+
+ /// \brief Add a new AddrMode if it's compatible with the AddrModes we already
+ /// have.
+ /// \return True iff we succeeded in doing so.
+ bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
+ // Take note of if we have any non-trivial AddrModes, as we need to detect
+ // when all AddrModes are trivial as then we would introduce a phi or select
+ // which just duplicates what's already there.
+ AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
+
+ // If this is the first addrmode then everything is fine.
+ if (AddrModes.empty()) {
+ AddrModes.emplace_back(NewAddrMode);
+ return true;
+ }
+
+ // Figure out how different this is from the other address modes, which we
+ // can do just by comparing against the first one given that we only care
+ // about the cumulative difference.
+ ExtAddrMode::FieldName ThisDifferentField =
+ AddrModes[0].compare(NewAddrMode);
+ if (DifferentField == ExtAddrMode::NoField)
+ DifferentField = ThisDifferentField;
+ else if (DifferentField != ThisDifferentField)
+ DifferentField = ExtAddrMode::MultipleFields;
+
+ // If NewAddrMode differs in only one dimension, and that dimension isn't
+ // the amount that ScaledReg is scaled by, then we can handle it by
+ // inserting a phi/select later on. Even if NewAddMode is the same
+ // we still need to collect it due to original value is different.
+ // And later we will need all original values as anchors during
+ // finding the common Phi node.
+ if (DifferentField != ExtAddrMode::MultipleFields &&
+ DifferentField != ExtAddrMode::ScaleField) {
+ AddrModes.emplace_back(NewAddrMode);
+ return true;
+ }
+
+ // We couldn't combine NewAddrMode with the rest, so return failure.
+ AddrModes.clear();
+ return false;
+ }
+
+ /// \brief Combine the addressing modes we've collected into a single
+ /// addressing mode.
+ /// \return True iff we successfully combined them or we only had one so
+ /// didn't need to combine them anyway.
+ bool combineAddrModes() {
+ // If we have no AddrModes then they can't be combined.
+ if (AddrModes.size() == 0)
+ return false;
+
+ // A single AddrMode can trivially be combined.
+ if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
+ return true;
+
+ // If the AddrModes we collected are all just equal to the value they are
+ // derived from then combining them wouldn't do anything useful.
+ if (AllAddrModesTrivial)
+ return false;
+
+ if (!addrModeCombiningAllowed())
+ return false;
+
+ // Build a map between <original value, basic block where we saw it> to
+ // value of base register.
+ // Bail out if there is no common type.
+ FoldAddrToValueMapping Map;
+ if (!initializeMap(Map))
+ return false;
+
+ Value *CommonValue = findCommon(Map);
+ if (CommonValue)
+ AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
+ return CommonValue != nullptr;
+ }
+
+private:
+ /// \brief Initialize Map with anchor values. For address seen in some BB
+ /// we set the value of different field saw in this address.
+ /// If address is not an instruction than basic block is set to null.
+ /// At the same time we find a common type for different field we will
+ /// use to create new Phi/Select nodes. Keep it in CommonType field.
+ /// Return false if there is no common type found.
+ bool initializeMap(FoldAddrToValueMapping &Map) {
+ // Keep track of keys where the value is null. We will need to replace it
+ // with constant null when we know the common type.
+ SmallVector<ValueInBB, 2> NullValue;
+ Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
+ for (auto &AM : AddrModes) {
+ BasicBlock *BB = nullptr;
+ if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
+ BB = I->getParent();
+
+ Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
+ if (DV) {
+ auto *Type = DV->getType();
+ if (CommonType && CommonType != Type)
+ return false;
+ CommonType = Type;
+ Map[{ AM.OriginalValue, BB }] = DV;
+ } else {
+ NullValue.push_back({ AM.OriginalValue, BB });
+ }
+ }
+ assert(CommonType && "At least one non-null value must be!");
+ for (auto VIBB : NullValue)
+ Map[VIBB] = Constant::getNullValue(CommonType);
+ return true;
+ }
+
+ /// \brief We have mapping between value A and basic block where value A
+ /// seen to other value B where B was a field in addressing mode represented
+ /// by A. Also we have an original value C representin an address in some
+ /// basic block. Traversing from C through phi and selects we ended up with
+ /// A's in a map. This utility function tries to find a value V which is a
+ /// field in addressing mode C and traversing through phi nodes and selects
+ /// we will end up in corresponded values B in a map.
+ /// The utility will create a new Phi/Selects if needed.
+ // The simple example looks as follows:
+ // BB1:
+ // p1 = b1 + 40
+ // br cond BB2, BB3
+ // BB2:
+ // p2 = b2 + 40
+ // br BB3
+ // BB3:
+ // p = phi [p1, BB1], [p2, BB2]
+ // v = load p
+ // Map is
+ // <p1, BB1> -> b1
+ // <p2, BB2> -> b2
+ // Request is
+ // <p, BB3> -> ?
+ // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
+ Value *findCommon(FoldAddrToValueMapping &Map) {
+ // Tracks of new created Phi nodes.
+ SmallPtrSet<PHINode *, 32> NewPhiNodes;
+ // Tracks of new created Select nodes.
+ SmallPtrSet<SelectInst *, 32> NewSelectNodes;
+ // Tracks the simplification of new created phi nodes. The reason we use
+ // this mapping is because we will add new created Phi nodes in AddrToBase.
+ // Simplification of Phi nodes is recursive, so some Phi node may
+ // be simplified after we added it to AddrToBase.
+ // Using this mapping we can find the current value in AddrToBase.
+ SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes);
+
+ // First step, DFS to create PHI nodes for all intermediate blocks.
+ // Also fill traverse order for the second step.
+ SmallVector<ValueInBB, 32> TraverseOrder;
+ InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes);
+
+ // Second Step, fill new nodes by merged values and simplify if possible.
+ FillPlaceholders(Map, TraverseOrder, ST);
+
+ if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) {
+ DestroyNodes(NewPhiNodes);
+ DestroyNodes(NewSelectNodes);
+ return nullptr;
+ }
+
+ // Now we'd like to match New Phi nodes to existed ones.
+ unsigned PhiNotMatchedCount = 0;
+ if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
+ DestroyNodes(NewPhiNodes);
+ DestroyNodes(NewSelectNodes);
+ return nullptr;
+ }
+
+ auto *Result = ST.Get(Map.find(Original)->second);
+ if (Result) {
+ NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount;
+ NumMemoryInstsSelectCreated += NewSelectNodes.size();
+ }
+ return Result;
+ }
+
+ /// \brief Destroy nodes from a set.
+ template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) {
+ // For safe erasing, replace the Phi with dummy value first.
+ auto Dummy = UndefValue::get(CommonType);
+ for (auto I : Instructions) {
+ I->replaceAllUsesWith(Dummy);
+ I->eraseFromParent();
+ }
+ }
+
+ /// \brief Try to match PHI node to Candidate.
+ /// Matcher tracks the matched Phi nodes.
+ bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
+ DenseSet<PHIPair> &Matcher,
+ SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) {
+ SmallVector<PHIPair, 8> WorkList;
+ Matcher.insert({ PHI, Candidate });
+ WorkList.push_back({ PHI, Candidate });
+ SmallSet<PHIPair, 8> Visited;
+ while (!WorkList.empty()) {
+ auto Item = WorkList.pop_back_val();
+ if (!Visited.insert(Item).second)
+ continue;
+ // We iterate over all incoming values to Phi to compare them.
+ // If values are different and both of them Phi and the first one is a
+ // Phi we added (subject to match) and both of them is in the same basic
+ // block then we can match our pair if values match. So we state that
+ // these values match and add it to work list to verify that.
+ for (auto B : Item.first->blocks()) {
+ Value *FirstValue = Item.first->getIncomingValueForBlock(B);
+ Value *SecondValue = Item.second->getIncomingValueForBlock(B);
+ if (FirstValue == SecondValue)
+ continue;
+
+ PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
+ PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
+
+ // One of them is not Phi or
+ // The first one is not Phi node from the set we'd like to match or
+ // Phi nodes from different basic blocks then
+ // we will not be able to match.
+ if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
+ FirstPhi->getParent() != SecondPhi->getParent())
+ return false;
+
+ // If we already matched them then continue.
+ if (Matcher.count({ FirstPhi, SecondPhi }))
+ continue;
+ // So the values are different and does not match. So we need them to
+ // match.
+ Matcher.insert({ FirstPhi, SecondPhi });
+ // But me must check it.
+ WorkList.push_back({ FirstPhi, SecondPhi });
+ }
+ }
+ return true;
+ }
+
+ /// \brief For the given set of PHI nodes try to find their equivalents.
+ /// Returns false if this matching fails and creation of new Phi is disabled.
+ bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch,
+ SimplificationTracker &ST, bool AllowNewPhiNodes,
+ unsigned &PhiNotMatchedCount) {
+ DenseSet<PHIPair> Matched;
+ SmallPtrSet<PHINode *, 8> WillNotMatch;
+ while (PhiNodesToMatch.size()) {
+ PHINode *PHI = *PhiNodesToMatch.begin();
+
+ // Add us, if no Phi nodes in the basic block we do not match.
+ WillNotMatch.clear();
+ WillNotMatch.insert(PHI);
+
+ // Traverse all Phis until we found equivalent or fail to do that.
+ bool IsMatched = false;
+ for (auto &P : PHI->getParent()->phis()) {
+ if (&P == PHI)
+ continue;
+ if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
+ break;
+ // If it does not match, collect all Phi nodes from matcher.
+ // if we end up with no match, them all these Phi nodes will not match
+ // later.
+ for (auto M : Matched)
+ WillNotMatch.insert(M.first);
+ Matched.clear();
+ }
+ if (IsMatched) {
+ // Replace all matched values and erase them.
+ for (auto MV : Matched) {
+ MV.first->replaceAllUsesWith(MV.second);
+ PhiNodesToMatch.erase(MV.first);
+ ST.Put(MV.first, MV.second);
+ MV.first->eraseFromParent();
+ }
+ Matched.clear();
+ continue;
+ }
+ // If we are not allowed to create new nodes then bail out.
+ if (!AllowNewPhiNodes)
+ return false;
+ // Just remove all seen values in matcher. They will not match anything.
+ PhiNotMatchedCount += WillNotMatch.size();
+ for (auto *P : WillNotMatch)
+ PhiNodesToMatch.erase(P);
+ }
+ return true;
+ }
+ /// \brief Fill the placeholder with values from predecessors and simplify it.
+ void FillPlaceholders(FoldAddrToValueMapping &Map,
+ SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SimplificationTracker &ST) {
+ while (!TraverseOrder.empty()) {
+ auto Current = TraverseOrder.pop_back_val();
+ assert(Map.find(Current) != Map.end() && "No node to fill!!!");
+ Value *CurrentValue = Current.first;
+ BasicBlock *CurrentBlock = Current.second;
+ Value *V = Map[Current];
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
+ // CurrentValue also must be Select.
+ auto *CurrentSelect = cast<SelectInst>(CurrentValue);
+ auto *TrueValue = CurrentSelect->getTrueValue();
+ ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
+ ? CurrentBlock
+ : nullptr };
+ assert(Map.find(TrueItem) != Map.end() && "No True Value!");
+ Select->setTrueValue(ST.Get(Map[TrueItem]));
+ auto *FalseValue = CurrentSelect->getFalseValue();
+ ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
+ ? CurrentBlock
+ : nullptr };
+ assert(Map.find(FalseItem) != Map.end() && "No False Value!");
+ Select->setFalseValue(ST.Get(Map[FalseItem]));
+ } else {
+ // Must be a Phi node then.
+ PHINode *PHI = cast<PHINode>(V);
+ // Fill the Phi node with values from predecessors.
+ bool IsDefinedInThisBB =
+ cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
+ auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
+ for (auto B : predecessors(CurrentBlock)) {
+ Value *PV = IsDefinedInThisBB
+ ? CurrentPhi->getIncomingValueForBlock(B)
+ : CurrentValue;
+ ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
+ assert(Map.find(item) != Map.end() && "No predecessor Value!");
+ PHI->addIncoming(ST.Get(Map[item]), B);
+ }
+ }
+ // Simplify if possible.
+ Map[Current] = ST.Simplify(V);
+ }
+ }
+
+ /// Starting from value recursively iterates over predecessors up to known
+ /// ending values represented in a map. For each traversed block inserts
+ /// a placeholder Phi or Select.
+ /// Reports all new created Phi/Select nodes by adding them to set.
+ /// Also reports and order in what basic blocks have been traversed.
+ void InsertPlaceholders(FoldAddrToValueMapping &Map,
+ SmallVectorImpl<ValueInBB> &TraverseOrder,
+ SmallPtrSetImpl<PHINode *> &NewPhiNodes,
+ SmallPtrSetImpl<SelectInst *> &NewSelectNodes) {
+ SmallVector<ValueInBB, 32> Worklist;
+ assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
+ "Address must be a Phi or Select node");
+ auto *Dummy = UndefValue::get(CommonType);
+ Worklist.push_back(Original);
+ while (!Worklist.empty()) {
+ auto Current = Worklist.pop_back_val();
+ // If value is not an instruction it is something global, constant,
+ // parameter and we can say that this value is observable in any block.
+ // Set block to null to denote it.
+ // Also please take into account that it is how we build anchors.
+ if (!isa<Instruction>(Current.first))
+ Current.second = nullptr;
+ // if it is already visited or it is an ending value then skip it.
+ if (Map.find(Current) != Map.end())
+ continue;
+ TraverseOrder.push_back(Current);
+
+ Value *CurrentValue = Current.first;
+ BasicBlock *CurrentBlock = Current.second;
+ // CurrentValue must be a Phi node or select. All others must be covered
+ // by anchors.
+ Instruction *CurrentI = cast<Instruction>(CurrentValue);
+ bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
+
+ unsigned PredCount =
+ std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock));
+ // if Current Value is not defined in this basic block we are interested
+ // in values in predecessors.
+ if (!IsDefinedInThisBB) {
+ assert(PredCount && "Unreachable block?!");
+ PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
+ &CurrentBlock->front());
+ Map[Current] = PHI;
+ NewPhiNodes.insert(PHI);
+ // Add all predecessors in work list.
+ for (auto B : predecessors(CurrentBlock))
+ Worklist.push_back({ CurrentValue, B });
+ continue;
+ }
+ // Value is defined in this basic block.
+ if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
+ // Is it OK to get metadata from OrigSelect?!
+ // Create a Select placeholder with dummy value.
+ SelectInst *Select =
+ SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
+ OrigSelect->getName(), OrigSelect, OrigSelect);
+ Map[Current] = Select;
+ NewSelectNodes.insert(Select);
+ // We are interested in True and False value in this basic block.
+ Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
+ Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
+ } else {
+ // It must be a Phi node then.
+ auto *CurrentPhi = cast<PHINode>(CurrentI);
+ // Create new Phi node for merge of bases.
+ assert(PredCount && "Unreachable block?!");
+ PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
+ &CurrentBlock->front());
+ Map[Current] = PHI;
+ NewPhiNodes.insert(PHI);
+
+ // Add all predecessors in work list.
+ for (auto B : predecessors(CurrentBlock))
+ Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
+ }
+ }
+ }
+
+ bool addrModeCombiningAllowed() {
+ if (DisableComplexAddrModes)
+ return false;
+ switch (DifferentField) {
+ default:
+ return false;
+ case ExtAddrMode::BaseRegField:
+ return AddrSinkCombineBaseReg;
+ case ExtAddrMode::BaseGVField:
+ return AddrSinkCombineBaseGV;
+ case ExtAddrMode::BaseOffsField:
+ return AddrSinkCombineBaseOffs;
+ case ExtAddrMode::ScaledRegField:
+ return AddrSinkCombineScaledReg;
+ }
+ }
+};
+} // end anonymous namespace
+
/// Try adding ScaleReg*Scale to the current addressing mode.
/// Return true and update AddrMode if this addr mode is legal for the target,
/// false if not.
@@ -3294,6 +3217,8 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI,
ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
}
+namespace {
+
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
/// \brief Utility function to check whether or not a sign or zero extension
@@ -3370,12 +3295,13 @@ class TypePromotionHelper {
public:
/// Type for the utility function that promotes the operand of Ext.
- typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInstsCost,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs,
- const TargetLowering &TLI);
+ using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
+
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -3390,6 +3316,8 @@ public:
const InstrToOrigTy &PromotedInsts);
};
+} // end anonymous namespace
+
bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
Type *ConsideredExtType,
const InstrToOrigTy &PromotedInsts,
@@ -3488,7 +3416,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
}
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
- llvm::Instruction *SExt, TypePromotionTransaction &TPT,
+ Instruction *SExt, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
@@ -3552,9 +3480,8 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Create the truncate now.
Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
- ITrunc->removeFromParent();
// Insert it just after the definition.
- ITrunc->insertAfter(ExtOpnd);
+ ITrunc->moveAfter(ExtOpnd);
if (Truncs)
Truncs->push_back(ITrunc);
}
@@ -3752,7 +3679,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Shl: {
// Can only handle X*C and X << C.
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS)
+ if (!RHS || RHS->getBitWidth() > 64)
return false;
int64_t Scale = RHS->getSExtValue();
if (Opcode == Instruction::Shl)
@@ -4234,8 +4161,6 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
return true;
}
-} // end anonymous namespace
-
/// Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
@@ -4273,13 +4198,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SmallPtrSet<Value*, 16> Visited;
worklist.push_back(Addr);
- // Use a worklist to iteratively look through PHI nodes, and ensure that
- // the addressing mode obtained from the non-PHI roots of the graph
- // are equivalent.
- bool AddrModeFound = false;
- bool PhiSeen = false;
+ // Use a worklist to iteratively look through PHI and select nodes, and
+ // ensure that the addressing mode obtained from the non-PHI/select roots of
+ // the graph are compatible.
+ bool PhiOrSelectSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode;
+ const SimplifyQuery SQ(*DL, TLInfo);
+ AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4303,7 +4228,14 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (Value *IncValue : P->incoming_values())
worklist.push_back(IncValue);
- PhiSeen = true;
+ PhiOrSelectSeen = true;
+ continue;
+ }
+ // Similar for select.
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ worklist.push_back(SI->getFalseValue());
+ worklist.push_back(SI->getTrueValue());
+ PhiOrSelectSeen = true;
continue;
}
@@ -4314,30 +4246,29 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
InsertedInsts, PromotedInsts, TPT);
+ NewAddrMode.OriginalValue = V;
- if (!AddrModeFound) {
- AddrModeFound = true;
- AddrMode = NewAddrMode;
- continue;
- }
- if (NewAddrMode == AddrMode)
- continue;
-
- AddrModeFound = false;
- break;
+ if (!AddrModes.addNewAddrMode(NewAddrMode))
+ break;
}
- // If the addressing mode couldn't be determined, or if multiple different
- // ones were determined, bail out now.
- if (!AddrModeFound) {
+ // Try to combine the AddrModes we've collected. If we couldn't collect any,
+ // or we have multiple but either couldn't combine them or combining them
+ // wouldn't do anything useful, bail out now.
+ if (!AddrModes.combineAddrModes()) {
TPT.rollback(LastKnownGood);
return false;
}
TPT.commit();
+ // Get the combined AddrMode (or the only AddrMode, if we only had one).
+ ExtAddrMode AddrMode = AddrModes.getAddrMode();
+
// If all the instructions matched are already in this BB, don't do anything.
- // If we saw Phi node then it is not local definitely.
- if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) {
+ // If we saw a Phi node then it is not local definitely, and if we saw a select
+ // then we want to push the address calculation past it even if it's already
+ // in this BB.
+ if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
return IsNonLocalValue(V, MemoryInst->getParent());
})) {
DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
@@ -4351,9 +4282,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
- // done this for some other load/store instr in this block. If so, reuse the
- // computation.
- Value *&SunkAddr = SunkAddrs[Addr];
+ // done this for some other load/store instr in this block. If so, reuse
+ // the computation. Before attempting reuse, check if the address is valid
+ // as it may have been erased.
+
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
+
+ Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
if (SunkAddr) {
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
@@ -4578,6 +4513,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+ // Store the newly computed address into the cache. In the case we reused a
+ // value, this should be idempotent.
+ SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
// If we have no uses, recursively delete the value and all dead instructions
// using it.
@@ -4909,8 +4847,7 @@ bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
assert(LI && ExtFedByLoad && "Expect a valid load and extension");
TPT.commit();
// Move the extend into the same block as the load
- ExtFedByLoad->removeFromParent();
- ExtFedByLoad->insertAfter(LI);
+ ExtFedByLoad->moveAfter(LI);
// CGP does not check if the zext would be speculatively executed when moved
// to the same basic block as the load. Preserving its original location
// would pessimize the debugging experience, as well as negatively impact
@@ -5127,10 +5064,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
// b2:
// x = phi x1', x2'
// y = and x, 0xff
-//
-
bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
-
if (!Load->isSimple() ||
!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
return false;
@@ -5169,7 +5103,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
}
switch (I->getOpcode()) {
- case llvm::Instruction::And: {
+ case Instruction::And: {
auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
if (!AndC)
return false;
@@ -5183,7 +5117,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
break;
}
- case llvm::Instruction::Shl: {
+ case Instruction::Shl: {
auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
if (!ShlC)
return false;
@@ -5192,7 +5126,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
break;
}
- case llvm::Instruction::Trunc: {
+ case Instruction::Trunc: {
EVT TruncVT = TLI->getValueType(*DL, I->getType());
unsigned TruncBitWidth = TruncVT.getSizeInBits();
DemandBits.setLowBits(TruncBitWidth);
@@ -5596,6 +5530,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
namespace {
+
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
/// E.g.,
@@ -5623,12 +5558,15 @@ class VectorPromoteHelper {
/// The transition being moved downwards.
Instruction *Transition;
+
/// The sequence of instructions to be promoted.
SmallVector<Instruction *, 4> InstsToBePromoted;
+
/// Cost of combining a store and an extract.
unsigned StoreExtractCombineCost;
+
/// Instruction that will be combined with the transition.
- Instruction *CombineInst;
+ Instruction *CombineInst = nullptr;
/// \brief The instruction that represents the current end of the transition.
/// Since we are faking the promotion until we reach the end of the chain
@@ -5734,7 +5672,7 @@ class VectorPromoteHelper {
/// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
/// used at the index of the extract.
Value *getConstantVector(Constant *Val, bool UseSplat) const {
- unsigned ExtractIdx = UINT_MAX;
+ unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
if (!UseSplat) {
// If we cannot determine where the constant must be, we have to
// use a splat constant.
@@ -5788,7 +5726,7 @@ public:
const TargetTransformInfo &TTI, Instruction *Transition,
unsigned CombineCost)
: DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
- StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
+ StoreExtractCombineCost(CombineCost) {
assert(Transition && "Do not know how to promote null");
}
@@ -5863,7 +5801,8 @@ public:
return true;
}
};
-} // End of anonymous namespace.
+
+} // end anonymous namespace
void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
// At this point, we know that all the operands of ToBePromoted but Def
@@ -5902,8 +5841,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
"this?");
ToBePromoted->setOperand(U.getOperandNo(), NewVal);
}
- Transition->removeFromParent();
- Transition->insertAfter(ToBePromoted);
+ Transition->moveAfter(ToBePromoted);
Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
}
@@ -5911,7 +5849,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
- unsigned CombineCost = UINT_MAX;
+ unsigned CombineCost = std::numeric_limits<unsigned>::max();
if (DisableStoreExtract || !TLI ||
(!StressStoreExtract &&
!TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
@@ -6073,6 +6011,170 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
return true;
}
+// Return true if the GEP has two operands, the first operand is of a sequential
+// type, and the second operand is a constant.
+static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
+ gep_type_iterator I = gep_type_begin(*GEP);
+ return GEP->getNumOperands() == 2 &&
+ I.isSequential() &&
+ isa<ConstantInt>(GEP->getOperand(1));
+}
+
+// Try unmerging GEPs to reduce liveness interference (register pressure) across
+// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
+// reducing liveness interference across those edges benefits global register
+// allocation. Currently handles only certain cases.
+//
+// For example, unmerge %GEPI and %UGEPI as below.
+//
+// ---------- BEFORE ----------
+// SrcBlock:
+// ...
+// %GEPIOp = ...
+// ...
+// %GEPI = gep %GEPIOp, Idx
+// ...
+// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
+// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
+// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
+// %UGEPI)
+//
+// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
+// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
+// ...
+//
+// DstBi:
+// ...
+// %UGEPI = gep %GEPIOp, UIdx
+// ...
+// ---------------------------
+//
+// ---------- AFTER ----------
+// SrcBlock:
+// ... (same as above)
+// (* %GEPI is still alive on the indirectbr edges)
+// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
+// unmerging)
+// ...
+//
+// DstBi:
+// ...
+// %UGEPI = gep %GEPI, (UIdx-Idx)
+// ...
+// ---------------------------
+//
+// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
+// no longer alive on them.
+//
+// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
+// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
+// not to disable further simplications and optimizations as a result of GEP
+// merging.
+//
+// Note this unmerging may increase the length of the data flow critical path
+// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
+// between the register pressure and the length of data-flow critical
+// path. Restricting this to the uncommon IndirectBr case would minimize the
+// impact of potentially longer critical path, if any, and the impact on compile
+// time.
+static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
+ const TargetTransformInfo *TTI) {
+ BasicBlock *SrcBlock = GEPI->getParent();
+ // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
+ // (non-IndirectBr) cases exit early here.
+ if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
+ return false;
+ // Check that GEPI is a simple gep with a single constant index.
+ if (!GEPSequentialConstIndexed(GEPI))
+ return false;
+ ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
+ // Check that GEPI is a cheap one.
+ if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
+ > TargetTransformInfo::TCC_Basic)
+ return false;
+ Value *GEPIOp = GEPI->getOperand(0);
+ // Check that GEPIOp is an instruction that's also defined in SrcBlock.
+ if (!isa<Instruction>(GEPIOp))
+ return false;
+ auto *GEPIOpI = cast<Instruction>(GEPIOp);
+ if (GEPIOpI->getParent() != SrcBlock)
+ return false;
+ // Check that GEP is used outside the block, meaning it's alive on the
+ // IndirectBr edge(s).
+ if (find_if(GEPI->users(), [&](User *Usr) {
+ if (auto *I = dyn_cast<Instruction>(Usr)) {
+ if (I->getParent() != SrcBlock) {
+ return true;
+ }
+ }
+ return false;
+ }) == GEPI->users().end())
+ return false;
+ // The second elements of the GEP chains to be unmerged.
+ std::vector<GetElementPtrInst *> UGEPIs;
+ // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
+ // on IndirectBr edges.
+ for (User *Usr : GEPIOp->users()) {
+ if (Usr == GEPI) continue;
+ // Check if Usr is an Instruction. If not, give up.
+ if (!isa<Instruction>(Usr))
+ return false;
+ auto *UI = cast<Instruction>(Usr);
+ // Check if Usr in the same block as GEPIOp, which is fine, skip.
+ if (UI->getParent() == SrcBlock)
+ continue;
+ // Check if Usr is a GEP. If not, give up.
+ if (!isa<GetElementPtrInst>(Usr))
+ return false;
+ auto *UGEPI = cast<GetElementPtrInst>(Usr);
+ // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
+ // the pointer operand to it. If so, record it in the vector. If not, give
+ // up.
+ if (!GEPSequentialConstIndexed(UGEPI))
+ return false;
+ if (UGEPI->getOperand(0) != GEPIOp)
+ return false;
+ if (GEPIIdx->getType() !=
+ cast<ConstantInt>(UGEPI->getOperand(1))->getType())
+ return false;
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
+ > TargetTransformInfo::TCC_Basic)
+ return false;
+ UGEPIs.push_back(UGEPI);
+ }
+ if (UGEPIs.size() == 0)
+ return false;
+ // Check the materializing cost of (Uidx-Idx).
+ for (GetElementPtrInst *UGEPI : UGEPIs) {
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
+ unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
+ if (ImmCost > TargetTransformInfo::TCC_Basic)
+ return false;
+ }
+ // Now unmerge between GEPI and UGEPIs.
+ for (GetElementPtrInst *UGEPI : UGEPIs) {
+ UGEPI->setOperand(0, GEPI);
+ ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+ Constant *NewUGEPIIdx =
+ ConstantInt::get(GEPIIdx->getType(),
+ UGEPIIdx->getValue() - GEPIIdx->getValue());
+ UGEPI->setOperand(1, NewUGEPIIdx);
+ // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
+ // inbounds to avoid UB.
+ if (!GEPI->isInBounds()) {
+ UGEPI->setIsInBounds(false);
+ }
+ }
+ // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
+ // alive on IndirectBr edges).
+ assert(find_if(GEPIOp->users(), [&](User *Usr) {
+ return cast<Instruction>(Usr)->getParent() != SrcBlock;
+ }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
+ return true;
+}
+
bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
@@ -6186,6 +6288,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
optimizeInst(NC, ModifiedDT);
return true;
}
+ if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
+ return true;
+ }
return false;
}
@@ -6266,7 +6371,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
Instruction *Insn = &*BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
// Leave dbg.values that refer to an alloca alone. These
- // instrinsics describe the address of a variable (= the alloca)
+ // intrinsics describe the address of a variable (= the alloca)
// being taken. They should not be moved next to the alloca
// (and to the beginning of the scope), but rather stay close to
// where said address is used.
@@ -6298,7 +6403,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
- uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
NewTrue = NewTrue / Scale;
NewFalse = NewFalse / Scale;
}
diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp
deleted file mode 100644
index 7f7350f5fb5c..000000000000
--- a/lib/CodeGen/CountingFunctionInserter.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Insert calls to counter functions, such as mcount, intended to be called
-// once per function, at the beginning of each function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- struct CountingFunctionInserter : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CountingFunctionInserter() : FunctionPass(ID) {
- initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override {
- std::string CountingFunctionName =
- F.getFnAttribute("counting-function").getValueAsString();
- if (CountingFunctionName.empty())
- return false;
-
- Type *VoidTy = Type::getVoidTy(F.getContext());
- Constant *CountingFn =
- F.getParent()->getOrInsertFunction(CountingFunctionName,
- VoidTy);
- CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
- return true;
- }
- };
-
- char CountingFunctionInserter::ID = 0;
-}
-
-INITIALIZE_PASS(CountingFunctionInserter, "cfinserter",
- "Inserts calls to mcount-like functions", false, false)
-
-//===----------------------------------------------------------------------===//
-//
-// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names.
-//
-FunctionPass *llvm::createCountingFunctionInserterPass() {
- return new CountingFunctionInserter();
-}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a3cf2846d2f5..98e22b24d37a 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,14 +14,29 @@
//===----------------------------------------------------------------------===//
#include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <map>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -35,8 +50,7 @@ CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
-CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
-}
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const unsigned BBSize = BB->size();
@@ -156,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// FIXME: The issue with predicated instruction is more complex. We are being
// conservative here because the kill markers cannot be trusted after
// if-conversion:
- // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
- // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
- // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
- // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
//
// The first R6 kill is not really a kill since it's killed by a predicated
// instruction which may not be executed. The second R6 def may or may not
@@ -333,8 +347,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
bool
CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
RegRefIter RegRefEnd,
- unsigned NewReg)
-{
+ unsigned NewReg) {
for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
MachineOperand *RefOper = I->second;
@@ -381,8 +394,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
unsigned AntiDepReg,
unsigned LastNewReg,
const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &Forbid)
-{
+ SmallVectorImpl<unsigned> &Forbid) {
ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
for (unsigned i = 0; i != Order.size(); ++i) {
unsigned NewReg = Order[i];
@@ -423,7 +435,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
}
unsigned CriticalAntiDepBreaker::
-BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+BreakAntiDependencies(const std::vector<SUnit> &SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
@@ -436,7 +448,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// This is used for updating debug information.
//
// FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
- DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+ DenseMap<MachineInstr *, const SUnit *> MISUnitMap;
// Find the node at the bottom of the critical path.
const SUnit *Max = nullptr;
@@ -454,7 +466,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
DEBUG(dbgs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] == ~0u)
- DEBUG(dbgs() << " " << TRI->getName(Reg));
+ DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
DEBUG(dbgs() << '\n');
}
@@ -634,9 +646,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
LastNewReg[AntiDepReg],
RC, ForbidRegs)) {
DEBUG(dbgs() << "Breaking anti-dependence edge on "
- << TRI->getName(AntiDepReg)
- << " with " << RegRefs.count(AntiDepReg) << " references"
- << " using " << TRI->getName(NewReg) << "!\n");
+ << printReg(AntiDepReg, TRI) << " with "
+ << RegRefs.count(AntiDepReg) << " references"
+ << " using " << printReg(NewReg, TRI) << "!\n");
// Update the references to the old register to refer to the new
// register.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 678779fa1a26..09c4423a2f05 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,16 +18,21 @@
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+#include <vector>
namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
class RegisterClassInfo;
class TargetInstrInfo;
+class TargetRegisterClass;
class TargetRegisterInfo;
-class MachineFunction;
class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
@@ -46,12 +51,13 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
/// corresponding value is null. If the register is live but used in
/// multiple register classes, the corresponding value is -1 casted to a
/// pointer.
- std::vector<const TargetRegisterClass*> Classes;
+ std::vector<const TargetRegisterClass *> Classes;
/// Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
- typedef std::multimap<unsigned, MachineOperand *>::const_iterator
- RegRefIter;
+
+ using RegRefIter =
+ std::multimap<unsigned, MachineOperand *>::const_iterator;
/// The index of the most recent kill (proceeding bottom-up),
/// or ~0u if the register is not live.
@@ -66,7 +72,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
BitVector KeepRegs;
public:
- CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI);
~CriticalAntiDepBreaker() override;
/// Initialize anti-dep breaking for a new basic block.
@@ -74,7 +80,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
/// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
- unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
@@ -101,6 +107,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &Forbid);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 853b9afa1026..848db444270d 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -29,13 +29,13 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -336,6 +336,38 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
VLIWScheduler->finishBlock();
}
+bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
+ const MachineMemOperand &Op2,
+ bool UseTBAA) const {
+ if (!Op1.getValue() || !Op2.getValue())
+ return true;
+
+ int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
+ int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
+ int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
+ UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
+ MemoryLocation(Op2.getValue(), Overlapb,
+ UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
+
+ return AAResult != NoAlias;
+}
+
+bool VLIWPacketizerList::alias(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool UseTBAA) const {
+ if (MI1.memoperands_empty() || MI2.memoperands_empty())
+ return true;
+
+ for (const MachineMemOperand *Op1 : MI1.memoperands())
+ for (const MachineMemOperand *Op2 : MI2.memoperands())
+ if (alias(*Op1, *Op2, UseTBAA))
+ return true;
+ return false;
+}
+
// Add a DAG mutation object to the ordered list.
void VLIWPacketizerList::addMutation(
std::unique_ptr<ScheduleDAGMutation> Mutation) {
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 91d18e2bcaa6..e6a54bb300f2 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,11 +15,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -95,7 +94,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
}
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
bool AnyChanges = false;
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index ab9a0592e017..7d7eb57352a2 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -17,12 +17,12 @@
/// when subregisters are involved.
///
/// Example:
-/// %vreg0 = some definition
-/// %vreg1 = IMPLICIT_DEF
-/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
-/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1
-/// = use %vreg3
-/// The %vreg0 definition is dead and %vreg3 contains an undefined value.
+/// %0 = some definition
+/// %1 = IMPLICIT_DEF
+/// %2 = REG_SEQUENCE %0, sub0, %1, sub1
+/// %3 = EXTRACT_SUBREG %2, sub1
+/// = use %3
+/// The %0 definition is dead and %3 contains an undefined value.
//
//===----------------------------------------------------------------------===//
@@ -34,14 +34,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -526,7 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
const VRegInfo &Info = VRegInfos[RegIdx];
- dbgs() << PrintReg(Reg, nullptr)
+ dbgs() << printReg(Reg, nullptr)
<< " Used: " << PrintLaneMask(Info.UsedLanes)
<< " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 2f833260bca2..39d80c0bf9bd 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,20 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <cstddef>
+
using namespace llvm;
#define DEBUG_TYPE "dwarfehprepare"
@@ -34,12 +43,13 @@ using namespace llvm;
STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
+
class DwarfEHPrepare : public FunctionPass {
// RewindFunction - _Unwind_Resume or the target equivalent.
- Constant *RewindFunction;
+ Constant *RewindFunction = nullptr;
- DominatorTree *DT;
- const TargetLowering *TLI;
+ DominatorTree *DT = nullptr;
+ const TargetLowering *TLI = nullptr;
bool InsertUnwindResumeCalls(Function &Fn);
Value *GetExceptionObject(ResumeInst *RI);
@@ -51,9 +61,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- DwarfEHPrepare()
- : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) {
- }
+ DwarfEHPrepare() : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -68,9 +76,11 @@ namespace {
return "Exception handling preparation";
}
};
+
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
+
INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
"Prepare DWARF exceptions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -162,7 +172,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
BasicBlock *BB = RI->getParent();
new UnreachableInst(Ctx, RI);
RI->eraseFromParent();
- SimplifyCFG(BB, TTI, 1);
+ simplifyCFG(BB, TTI);
}
}
Resumes.resize(ResumesLeft);
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 402afe75b141..6294ff450113 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -30,12 +30,12 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
// get right.
if (!MBB->livein_empty()) {
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
return false;
}
@@ -199,7 +199,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
<< BlockInstrLimit << " instructions.\n");
return false;
}
@@ -246,7 +246,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
if (!DefMI || DefMI->getParent() != Head)
continue;
if (InsertAfter.insert(DefMI).second)
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI);
if (DefMI->isTerminator()) {
DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
return false;
@@ -317,7 +317,7 @@ bool SSAIfConv::findInsertionPoint() {
dbgs() << "Would clobber";
for (SparseSet<unsigned>::const_iterator
i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
- dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << ' ' << printRegUnit(*i, TRI);
dbgs() << " live before " << *I;
});
continue;
@@ -361,10 +361,10 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
Succ1->succ_begin()[0] != Tail)
return false;
- DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
- << " -> BB#" << Succ0->getNumber()
- << "/BB#" << Succ1->getNumber()
- << " -> BB#" << Tail->getNumber() << '\n');
+ DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << "/"
+ << printMBBReference(*Succ1) << " -> "
+ << printMBBReference(*Tail) << '\n');
// Live-in physregs are tricky to get right when speculating code.
if (!Tail->livein_empty()) {
@@ -372,9 +372,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
return false;
}
} else {
- DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
- << " -> BB#" << Succ0->getNumber()
- << " -> BB#" << Tail->getNumber() << '\n');
+ DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << " -> "
+ << printMBBReference(*Tail) << '\n');
}
// This is a triangle or a diamond.
@@ -563,8 +563,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
assert(Head->succ_empty() && "Additional head successors?");
if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
// Splice Tail onto the end of Head.
- DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
- << " into head BB#" << Head->getNumber() << '\n');
+ DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head "
+ << printMBBReference(*Head) << '\n');
Head->splice(Head->end(), Tail,
Tail->begin(), Tail->end());
Head->transferSuccessorsAndUpdatePHIs(Tail);
@@ -785,7 +785,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
<< "********** Function: " << MF.getName() << '\n');
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
// Only run if conversion if the target wants it.
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index b3a25544be39..54c53eb16312 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -80,13 +80,15 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
O << "digraph {\n";
for (const auto &MBB : *MF) {
unsigned BB = MBB.getNumber();
- O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
- << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
- << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ O << "\t\"" << printMBBReference(MBB) << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"" << printMBBReference(MBB)
+ << "\"\n"
+ << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true)
+ << '\n';
for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
SE = MBB.succ_end(); SI != SE; ++SI)
- O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
- << "\" [ color=lightgray ]\n";
+ O << "\t\"" << printMBBReference(MBB) << "\" -> \""
+ << printMBBReference(**SI) << "\" [ color=lightgray ]\n";
}
O << "}\n";
return O;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index e272d25047e6..61ec3f4be1dc 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -15,11 +15,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -200,7 +200,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
LiveRegs[rx].Def = -1;
}
}
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
return;
}
@@ -246,7 +246,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
}
}
DEBUG(
- dbgs() << "BB#" << MBB->getNumber()
+ dbgs() << printMBBReference(*MBB)
<< (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
}
@@ -394,7 +394,7 @@ void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
continue;
for (int rx : regIndices(MO.getReg())) {
// This instruction explicitly defines rx.
- DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr
<< '\t' << *MI);
if (breakDependency) {
@@ -617,7 +617,7 @@ bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
}
bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(*mf.getFunction()))
+ if (skipFunction(mf.getFunction()))
return false;
MF = &mf;
TII = MF->getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 324ea171293d..ec586a2caea3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -17,9 +17,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "expand-isel-pseudos"
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
new file mode 100644
index 000000000000..09c808463a41
--- /dev/null
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -0,0 +1,825 @@
+//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to expand memcmp() calls into optimally-sized loads and
+// compares for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "expandmemcmp"
+
+STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
+STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
+STATISTIC(NumMemCmpGreaterThanMax,
+ "Number of memcmp calls with size greater than max size");
+STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
+
+static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+ "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
+ cl::desc("The number of loads per basic block for inline expansion of "
+ "memcmp that is only being compared against zero."));
+
+namespace {
+
+
+// This class provides helper functions to expand a memcmp library call into an
+// inline expansion.
+class MemCmpExpansion {
+ struct ResultBlock {
+ BasicBlock *BB = nullptr;
+ PHINode *PhiSrc1 = nullptr;
+ PHINode *PhiSrc2 = nullptr;
+
+ ResultBlock() = default;
+ };
+
+ CallInst *const CI;
+ ResultBlock ResBlock;
+ const uint64_t Size;
+ unsigned MaxLoadSize;
+ uint64_t NumLoadsNonOneByte;
+ const uint64_t NumLoadsPerBlock;
+ std::vector<BasicBlock *> LoadCmpBlocks;
+ BasicBlock *EndBlock;
+ PHINode *PhiRes;
+ const bool IsUsedForZeroCmp;
+ const DataLayout &DL;
+ IRBuilder<> Builder;
+ // Represents the decomposition in blocks of the expansion. For example,
+ // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
+ // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
+ // TODO(courbet): Involve the target more in this computation. On X86, 7
+ // bytes can be done more efficiently with two overlaping 4-byte loads than
+ // covering the interval with [{4, 0},{2, 4},{1, 6}}.
+ struct LoadEntry {
+ LoadEntry(unsigned LoadSize, uint64_t Offset)
+ : LoadSize(LoadSize), Offset(Offset) {
+ assert(Offset % LoadSize == 0 && "invalid load entry");
+ }
+
+ uint64_t getGEPIndex() const { return Offset / LoadSize; }
+
+ // The size of the load for this block, in bytes.
+ const unsigned LoadSize;
+ // The offset of this load WRT the base pointer, in bytes.
+ const uint64_t Offset;
+ };
+ SmallVector<LoadEntry, 8> LoadSequence;
+
+ void createLoadCmpBlocks();
+ void createResultBlock();
+ void setupResultBlockPHINodes();
+ void setupEndBlockPHINodes();
+ Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
+ void emitLoadCompareBlock(unsigned BlockIndex);
+ void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+ unsigned &LoadIndex);
+ void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+ void emitMemCmpResultBlock();
+ Value *getMemCmpExpansionZeroCase();
+ Value *getMemCmpEqZeroOneBlock();
+ Value *getMemCmpOneBlock();
+
+ public:
+ MemCmpExpansion(CallInst *CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+ unsigned NumLoadsPerBlock, const DataLayout &DL);
+
+ unsigned getNumBlocks();
+ uint64_t getNumLoads() const { return LoadSequence.size(); }
+
+ Value *getMemCmpExpansion();
+};
+
+// Initialize the basic block structure required for expansion of memcmp call
+// with given maximum load size and memcmp size parameter.
+// This structure includes:
+// 1. A list of load compare blocks - LoadCmpBlocks.
+// 2. An EndBlock, split from original instruction point, which is the block to
+// return from.
+// 3. ResultBlock, block to branch to for early exit when a
+// LoadCmpBlock finds a difference.
+MemCmpExpansion::MemCmpExpansion(
+ CallInst *const CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+ const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
+ : CI(CI),
+ Size(Size),
+ MaxLoadSize(0),
+ NumLoadsNonOneByte(0),
+ NumLoadsPerBlock(NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp),
+ DL(TheDataLayout),
+ Builder(CI) {
+ assert(Size > 0 && "zero blocks");
+ // Scale the max size down if the target can load more bytes than we need.
+ size_t LoadSizeIndex = 0;
+ while (LoadSizeIndex < Options.LoadSizes.size() &&
+ Options.LoadSizes[LoadSizeIndex] > Size) {
+ ++LoadSizeIndex;
+ }
+ this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+ // Compute the decomposition.
+ uint64_t CurSize = Size;
+ uint64_t Offset = 0;
+ while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
+ const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
+ assert(LoadSize > 0 && "zero load size");
+ const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
+ if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+ // Do not expand if the total number of loads is larger than what the
+ // target allows. Note that it's important that we exit before completing
+ // the expansion to avoid using a ton of memory to store the expansion for
+ // large sizes.
+ LoadSequence.clear();
+ return;
+ }
+ if (NumLoadsForThisSize > 0) {
+ for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+ LoadSequence.push_back({LoadSize, Offset});
+ Offset += LoadSize;
+ }
+ if (LoadSize > 1) {
+ ++NumLoadsNonOneByte;
+ }
+ CurSize = CurSize % LoadSize;
+ }
+ ++LoadSizeIndex;
+ }
+ assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+}
+
+unsigned MemCmpExpansion::getNumBlocks() {
+ if (IsUsedForZeroCmp)
+ return getNumLoads() / NumLoadsPerBlock +
+ (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0);
+ return getNumLoads();
+}
+
+void MemCmpExpansion::createLoadCmpBlocks() {
+ for (unsigned i = 0; i < getNumBlocks(); i++) {
+ BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
+ EndBlock->getParent(), EndBlock);
+ LoadCmpBlocks.push_back(BB);
+ }
+}
+
+void MemCmpExpansion::createResultBlock() {
+ ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
+ EndBlock->getParent(), EndBlock);
+}
+
+// This function creates the IR instructions for loading and comparing 1 byte.
+// It loads 1 byte from each source of the memcmp parameters with the given
+// GEPIndex. It then subtracts the two loaded values and adds this result to the
+// final phi node for selecting the memcmp result.
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
+ unsigned GEPIndex) {
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+ Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex.
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
+ Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+
+ PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
+
+ if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
+ // Early exit branch if difference found to EndBlock. Otherwise, continue to
+ // next LoadCmpBlock,
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+ ConstantInt::get(Diff->getType(), 0));
+ BranchInst *CmpBr =
+ BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ Builder.Insert(CmpBr);
+ } else {
+ // The last block has an unconditional branch to EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
+ }
+}
+
+/// Generate an equality comparison for one or more pairs of loaded values.
+/// This is used in the case where the memcmp() call is compared equal or not
+/// equal to zero.
+Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
+ unsigned &LoadIndex) {
+ assert(LoadIndex < getNumLoads() &&
+ "getCompareLoadPairs() called with no remaining loads");
+ std::vector<Value *> XorList, OrList;
+ Value *Diff;
+
+ const unsigned NumLoads =
+ std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock);
+
+ // For a single-block expansion, start inserting before the memcmp call.
+ if (LoadCmpBlocks.empty())
+ Builder.SetInsertPoint(CI);
+ else
+ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+
+ Value *Cmp = nullptr;
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or. The type for the combinations is the largest load
+ // type.
+ IntegerType *const MaxLoadType =
+ NumLoads == 1 ? nullptr
+ : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
+ const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
+
+ IntegerType *LoadSizeType =
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using a GEP.
+ if (CurLoadEntry.Offset != 0) {
+ Source1 = Builder.CreateGEP(
+ LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+ Source2 = Builder.CreateGEP(
+ LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+ }
+
+ // Get a constant or load a value for each source address.
+ Value *LoadSrc1 = nullptr;
+ if (auto *Source1C = dyn_cast<Constant>(Source1))
+ LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+ if (!LoadSrc1)
+ LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+ Value *LoadSrc2 = nullptr;
+ if (auto *Source2C = dyn_cast<Constant>(Source2))
+ LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+ if (!LoadSrc2)
+ LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (NumLoads != 1) {
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+ }
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or.
+ Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+ Diff = Builder.CreateZExt(Diff, MaxLoadType);
+ XorList.push_back(Diff);
+ } else {
+ // If there's only one load per block, we just compare the loaded values.
+ Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ }
+ }
+
+ auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
+ std::vector<Value *> OutList;
+ for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
+ Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
+ OutList.push_back(Or);
+ }
+ if (InList.size() % 2 != 0)
+ OutList.push_back(InList.back());
+ return OutList;
+ };
+
+ if (!Cmp) {
+ // Pairwise OR the XOR results.
+ OrList = pairWiseOr(XorList);
+
+ // Pairwise OR the OR results until one result left.
+ while (OrList.size() != 1) {
+ OrList = pairWiseOr(OrList);
+ }
+ Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
+ }
+
+ return Cmp;
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+ unsigned &LoadIndex) {
+ Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
+
+ BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[BlockIndex + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise,
+ // continue to next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (BlockIndex == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+ }
+}
+
+// This function creates the IR intructions for loading and comparing using the
+// given LoadSize. It loads the number of bytes specified by LoadSize from each
+// source of the memcmp parameters. It then does a subtract to see if there was
+// a difference in the loaded values. If a difference is found, it branches
+// with an early exit to the ResultBlock for calculating which source was
+// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
+// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
+// a special case through emitLoadCompareByteBlock. The special handling can
+// simply subtract the loaded values and add it to the result phi node.
+void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
+ // There is one load per block in this case, BlockIndex == LoadIndex.
+ const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
+
+ if (CurLoadEntry.LoadSize == 1) {
+ MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
+ CurLoadEntry.getGEPIndex());
+ return;
+ }
+
+ Type *LoadSizeType =
+ IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using a GEP.
+ if (CurLoadEntry.Offset != 0) {
+ Source1 = Builder.CreateGEP(
+ LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+ Source2 = Builder.CreateGEP(
+ LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+ }
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian()) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+ }
+
+ // Add the loaded values to the phi nodes for calculating memcmp result only
+ // if result is not used in a zero equality.
+ if (!IsUsedForZeroCmp) {
+ ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);
+ ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);
+ }
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
+ BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[BlockIndex + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise, continue
+ // to next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (BlockIndex == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+ }
+}
+
+// This function populates the ResultBlock with a sequence to calculate the
+// memcmp result. It compares the two loaded source values and returns -1 if
+// src1 < src2 and 1 if src1 > src2.
+void MemCmpExpansion::emitMemCmpResultBlock() {
+ // Special case: if memcmp result is used in a zero equality, result does not
+ // need to be calculated and can simply return 1.
+ if (IsUsedForZeroCmp) {
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+ Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ return;
+ }
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
+ ResBlock.PhiSrc2);
+
+ Value *Res =
+ Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+}
+
+void MemCmpExpansion::setupResultBlockPHINodes() {
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ Builder.SetInsertPoint(ResBlock.BB);
+ // Note: this assumes one load per block.
+ ResBlock.PhiSrc1 =
+ Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
+ ResBlock.PhiSrc2 =
+ Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
+}
+
+void MemCmpExpansion::setupEndBlockPHINodes() {
+ Builder.SetInsertPoint(&EndBlock->front());
+ PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
+}
+
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
+ unsigned LoadIndex = 0;
+ // This loop populates each of the LoadCmpBlocks with the IR sequence to
+ // handle multiple loads per block.
+ for (unsigned I = 0; I < getNumBlocks(); ++I) {
+ emitLoadCompareBlockMultipleLoads(I, LoadIndex);
+ }
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+/// A memcmp expansion that compares equality with 0 and only has one block of
+/// load and compare can bypass the compare, branch, and phi IR that is required
+/// in the general case.
+Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
+ unsigned LoadIndex = 0;
+ Value *Cmp = getCompareLoadPairs(0, LoadIndex);
+ assert(LoadIndex == getNumLoads() && "some entries were not consumed");
+ return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
+}
+
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock() {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ if (Size < 4) {
+ // The i8 and i16 cases don't need compares. We zext the loaded values and
+ // subtract them to get the suitable negative, zero, or positive i32 result.
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
+ return Builder.CreateSub(LoadSrc1, LoadSrc2);
+ }
+
+ // The result of memcmp is negative, zero, or positive, so produce that by
+ // subtracting 2 extended compare bits: sub (ugt, ult).
+ // If a target prefers to use selects to get -1/0/1, they should be able
+ // to transform this later. The inverse transform (going from selects to math)
+ // may not be possible in the DAG because the selects got converted into
+ // branches before we got there.
+ Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
+ Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
+ return Builder.CreateSub(ZextUGT, ZextULT);
+}
+
+// This function expands the memcmp call into an inline expansion and returns
+// the memcmp result.
+Value *MemCmpExpansion::getMemCmpExpansion() {
+ // A memcmp with zero-comparison with only one block of load and compare does
+ // not need to set up any extra blocks. This case could be handled in the DAG,
+ // but since we have all of the machinery to flexibly expand any memcpy here,
+ // we choose to handle this case too to avoid fragmented lowering.
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) {
+ BasicBlock *StartBlock = CI->getParent();
+ EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+ setupEndBlockPHINodes();
+ createResultBlock();
+
+ // If return value of memcmp is not used in a zero equality, we need to
+ // calculate which source was larger. The calculation requires the
+ // two loaded source values of each load compare block.
+ // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+ if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
+
+ // Create the number of required load compare basic blocks.
+ createLoadCmpBlocks();
+
+ // Update the terminator added by splitBasicBlock to branch to the first
+ // LoadCmpBlock.
+ StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+ }
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ if (IsUsedForZeroCmp)
+ return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
+ : getMemCmpExpansionZeroCase();
+
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock();
+
+ for (unsigned I = 0; I < getNumBlocks(); ++I) {
+ emitLoadCompareBlock(I);
+ }
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+// This function checks to see if an expansion of memcmp can be generated.
+// It checks for constant compare size that is less than the max inline size.
+// If an expansion cannot occur, returns false to leave as a library call.
+// Otherwise, the library call is replaced with a new IR instruction sequence.
+/// We want to transform:
+/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
+/// To:
+/// loadbb:
+/// %0 = bitcast i32* %buffer2 to i8*
+/// %1 = bitcast i32* %buffer1 to i8*
+/// %2 = bitcast i8* %1 to i64*
+/// %3 = bitcast i8* %0 to i64*
+/// %4 = load i64, i64* %2
+/// %5 = load i64, i64* %3
+/// %6 = call i64 @llvm.bswap.i64(i64 %4)
+/// %7 = call i64 @llvm.bswap.i64(i64 %5)
+/// %8 = sub i64 %6, %7
+/// %9 = icmp ne i64 %8, 0
+/// br i1 %9, label %res_block, label %loadbb1
+/// res_block: ; preds = %loadbb2,
+/// %loadbb1, %loadbb
+/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
+/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
+/// %10 = icmp ult i64 %phi.src1, %phi.src2
+/// %11 = select i1 %10, i32 -1, i32 1
+/// br label %endblock
+/// loadbb1: ; preds = %loadbb
+/// %12 = bitcast i32* %buffer2 to i8*
+/// %13 = bitcast i32* %buffer1 to i8*
+/// %14 = bitcast i8* %13 to i32*
+/// %15 = bitcast i8* %12 to i32*
+/// %16 = getelementptr i32, i32* %14, i32 2
+/// %17 = getelementptr i32, i32* %15, i32 2
+/// %18 = load i32, i32* %16
+/// %19 = load i32, i32* %17
+/// %20 = call i32 @llvm.bswap.i32(i32 %18)
+/// %21 = call i32 @llvm.bswap.i32(i32 %19)
+/// %22 = zext i32 %20 to i64
+/// %23 = zext i32 %21 to i64
+/// %24 = sub i64 %22, %23
+/// %25 = icmp ne i64 %24, 0
+/// br i1 %25, label %res_block, label %loadbb2
+/// loadbb2: ; preds = %loadbb1
+/// %26 = bitcast i32* %buffer2 to i8*
+/// %27 = bitcast i32* %buffer1 to i8*
+/// %28 = bitcast i8* %27 to i16*
+/// %29 = bitcast i8* %26 to i16*
+/// %30 = getelementptr i16, i16* %28, i16 6
+/// %31 = getelementptr i16, i16* %29, i16 6
+/// %32 = load i16, i16* %30
+/// %33 = load i16, i16* %31
+/// %34 = call i16 @llvm.bswap.i16(i16 %32)
+/// %35 = call i16 @llvm.bswap.i16(i16 %33)
+/// %36 = zext i16 %34 to i64
+/// %37 = zext i16 %35 to i64
+/// %38 = sub i64 %36, %37
+/// %39 = icmp ne i64 %38, 0
+/// br i1 %39, label %res_block, label %loadbb3
+/// loadbb3: ; preds = %loadbb2
+/// %40 = bitcast i32* %buffer2 to i8*
+/// %41 = bitcast i32* %buffer1 to i8*
+/// %42 = getelementptr i8, i8* %41, i8 14
+/// %43 = getelementptr i8, i8* %40, i8 14
+/// %44 = load i8, i8* %42
+/// %45 = load i8, i8* %43
+/// %46 = zext i8 %44 to i32
+/// %47 = zext i8 %45 to i32
+/// %48 = sub i32 %46, %47
+/// br label %endblock
+/// endblock: ; preds = %res_block,
+/// %loadbb3
+/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
+/// ret i32 %phi.res
+static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
+ const TargetLowering *TLI, const DataLayout *DL) {
+ NumMemCmpCalls++;
+
+ // Early exit from expansion if -Oz.
+ if (CI->getFunction()->optForMinSize())
+ return false;
+
+ // Early exit from expansion if size is not a constant.
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeCast) {
+ NumMemCmpNotConstant++;
+ return false;
+ }
+ const uint64_t SizeVal = SizeCast->getZExtValue();
+
+ if (SizeVal == 0) {
+ return false;
+ }
+
+ // TTI call to check if target would like to expand memcmp. Also, get the
+ // available load sizes.
+ const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+ const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+ if (!Options) return false;
+
+ const unsigned MaxNumLoads =
+ TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+
+ MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
+ IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
+
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
+
+ NumMemCmpInlined++;
+
+ Value *Res = Expansion.getMemCmpExpansion();
+
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+
+ return true;
+}
+
+
+
+class ExpandMemCmpPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandMemCmpPass() : FunctionPass(ID) {
+ initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F)) return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC) {
+ return false;
+ }
+ const TargetLowering* TL =
+ TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
+
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto PA = runImpl(F, TLI, TTI, TL);
+ return !PA.areAllPreserved();
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering* TL);
+ // Returns true if a change was made.
+ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, const TargetLowering* TL,
+ const DataLayout& DL);
+};
+
+bool ExpandMemCmpPass::runOnBlock(
+ BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, const TargetLowering* TL,
+ const DataLayout& DL) {
+ for (Instruction& I : BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI) {
+ continue;
+ }
+ LibFunc Func;
+ if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
+ Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+PreservedAnalyses ExpandMemCmpPass::runImpl(
+ Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
+ const TargetLowering* TL) {
+ const DataLayout& DL = F.getParent()->getDataLayout();
+ bool MadeChanges = false;
+ for (auto BBIt = F.begin(); BBIt != F.end();) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
+ MadeChanges = true;
+ // If changes were made, restart the function from the beginning, since
+ // the structure of the function was changed.
+ BBIt = F.begin();
+ } else {
+ ++BBIt;
+ }
+ }
+ return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+} // namespace
+
+char ExpandMemCmpPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
+ "Expand memcmp() to load/stores", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
+ "Expand memcmp() to load/stores", false, false)
+
+FunctionPass *llvm::createExpandMemCmpPass() {
+ return new ExpandMemCmpPass();
+}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 4ce86f27a7dd..6ef97d6dd5ec 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -17,11 +17,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -104,8 +104,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
if (DstSubReg == InsReg) {
// No need to insert an identity copy instruction.
// Watch out for case like this:
- // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
- // We must leave %RAX live.
+ // %rax = SUBREG_TO_REG 0, killed %eax, 3
+ // We must leave %rax live.
if (DstReg != InsReg) {
MI->setDesc(TII->get(TargetOpcode::KILL));
MI->RemoveOperand(3); // SubIdx
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index 70dca3b74b2f..abf487a4f198 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -95,7 +95,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
// and it can't be handled by generating this shuffle sequence.
// TODO: Implement scalarization of ordered reductions here for targets
// without native support.
- if (!II->getFastMathFlags().unsafeAlgebra())
+ if (!II->getFastMathFlags().isFast())
continue;
Vec = II->getArgOperand(1);
break;
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
index 0759bf6713e0..4ddf9f92836c 100644
--- a/lib/CodeGen/FEntryInserter.cpp
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -15,11 +15,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -36,15 +36,13 @@ struct FEntryInserter : public MachineFunctionPass {
bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
const std::string FEntryName =
- MF.getFunction()->getFnAttribute("fentry-call").getValueAsString();
+ MF.getFunction().getFnAttribute("fentry-call").getValueAsString();
if (FEntryName != "true")
return false;
auto &FirstMBB = *MF.begin();
- auto &FirstMI = *FirstMBB.begin();
-
auto *TII = MF.getSubtarget().getInstrInfo();
- BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
TII->get(TargetOpcode::FENTRY_CALL));
return true;
}
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 35246545ca91..4361d8b248c8 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -18,17 +18,16 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -329,10 +328,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
// Quick exit for functions that do not use GC.
- if (!MF.getFunction()->hasGC())
+ if (!MF.getFunction().hasGC())
return false;
- FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
MMI = &getAnalysis<MachineModuleInfo>();
TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index eba7ea8132e3..2db90f8888cb 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -1,34 +1,18 @@
-# List of all GlobalISel files.
-set(GLOBAL_ISEL_FILES
- CallLowering.cpp
- IRTranslator.cpp
- InstructionSelect.cpp
- InstructionSelector.cpp
- MachineIRBuilder.cpp
- LegalizerHelper.cpp
- Legalizer.cpp
- LegalizerInfo.cpp
- Localizer.cpp
- RegBankSelect.cpp
- RegisterBank.cpp
- RegisterBankInfo.cpp
- Utils.cpp
- )
-
-# Add GlobalISel files to the dependencies if the user wants to build it.
-if(LLVM_BUILD_GLOBAL_ISEL)
- set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES})
-else()
- set(GLOBAL_ISEL_BUILD_FILES"")
- set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})
-endif()
-
-# In LLVMBuild.txt files, it is not possible to mark a dependency to a
-# library as optional. So instead, generate an empty library if we did
-# not ask for it.
add_llvm_library(LLVMGlobalISel
- ${GLOBAL_ISEL_BUILD_FILES}
+ CallLowering.cpp
GlobalISel.cpp
+ IRTranslator.cpp
+ InstructionSelect.cpp
+ InstructionSelector.cpp
+ LegalizerHelper.cpp
+ Legalizer.cpp
+ LegalizerInfo.cpp
+ Localizer.cpp
+ MachineIRBuilder.cpp
+ RegBankSelect.cpp
+ RegisterBank.cpp
+ RegisterBankInfo.cpp
+ Utils.cpp
DEPENDS
intrinsics_gen
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index be0c5c2bb70e..114c068749eb 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -16,10 +16,10 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Target/TargetLowering.h"
using namespace llvm;
@@ -108,7 +108,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -160,10 +160,11 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
// FIXME: bitconverting between vector types may or may not be a
// nop in big-endian situations.
return ValReg;
- case CCValAssign::AExt:
+ case CCValAssign::AExt: {
assert(!VA.getLocVT().isVector() && "unexpected vector extend");
- // Otherwise, it's a nop.
- return ValReg;
+ auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
+ return MIB->getOperand(0).getReg();
+ }
case CCValAssign::SExt: {
unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(NewReg, ValReg);
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 29d1209bb02a..00c6a9d63158 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -16,13 +16,6 @@
using namespace llvm;
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-
-void llvm::initializeGlobalISel(PassRegistry &Registry) {
-}
-
-#else
-
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
@@ -30,4 +23,3 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
}
-#endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index ed1bd995e60b..433f99b0113b 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -15,7 +15,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -26,7 +26,11 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -54,12 +58,8 @@
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -124,8 +124,8 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
bool Success = translate(*CV, VReg);
if (!Success) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- MF->getFunction()->getSubprogram(),
- &MF->getFunction()->getEntryBlock());
+ MF->getFunction().getSubprogram(),
+ &MF->getFunction().getEntryBlock());
R << "unable to translate constant: " << ore::NV("Type", Val.getType());
reportTranslationError(*MF, *TPC, *ORE, R);
return VReg;
@@ -238,6 +238,8 @@ bool IRTranslator::translateCompare(const User &U,
bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
const ReturnInst &RI = cast<ReturnInst>(U);
const Value *Ret = RI.getReturnValue();
+ if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+ Ret = nullptr;
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
@@ -337,6 +339,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOLoad;
+ if (DL->getTypeStoreSize(LI.getType()) == 0)
+ return true;
+
unsigned Res = getOrCreateVReg(LI);
unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
@@ -355,6 +360,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOStore;
+ if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
+ return true;
+
unsigned Val = getOrCreateVReg(*SI.getValueOperand());
unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
@@ -583,7 +591,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
MIB.addDef(DstReg);
auto &TLI = *MF->getSubtarget().getTargetLowering();
- Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent());
+ Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
if (!Global)
return;
@@ -593,7 +601,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
MachineMemOperand::MODereferenceable;
*MemRefs =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
- DL->getPointerABIAlignment());
+ DL->getPointerABIAlignment(0));
MIB.setMemRefs(MemRefs, MemRefs + 1);
}
@@ -682,23 +690,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(),
- DI.getExpression());
+ MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
} else if (const auto *CI = dyn_cast<Constant>(V)) {
- MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(),
- DI.getExpression());
+ MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
unsigned Reg = getOrCreateVReg(*V);
// FIXME: This does not handle register-indirect values at offset 0. The
// direct/indirect thing shouldn't really be handled by something as
// implicit as reg+noreg vs reg+imm in the first palce, but it seems
// pretty baked in right now.
- if (DI.getOffset() != 0)
- MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(),
- DI.getExpression());
- else
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(),
- DI.getExpression());
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
}
return true;
}
@@ -850,14 +851,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI.getTgtMemIntrinsic(Info, CI, ID)) {
- MachineMemOperand::Flags Flags =
- Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- Flags |=
- Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore;
- uint64_t Size = Info.memVT.getSizeInBits() >> 3;
+ if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+ uint64_t Size = Info.memVT.getStoreSize();
MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Flags, Size, Info.align));
+ Info.flags, Size, Info.align));
}
return true;
@@ -928,7 +925,7 @@ bool IRTranslator::translateLandingPad(const User &U,
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother.
auto &TLI = *MF->getSubtarget().getTargetLowering();
- const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn();
+ const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
return true;
@@ -1105,7 +1102,7 @@ bool IRTranslator::translateShuffleVector(const User &U,
bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
const PHINode &PI = cast<PHINode>(U);
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI);
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
MIB.addDef(getOrCreateVReg(PI));
PendingPHIs.emplace_back(&PI, MIB.getInstr());
@@ -1239,7 +1236,7 @@ void IRTranslator::finalizeFunction() {
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
- const Function &F = *MF->getFunction();
+ const Function &F = MF->getFunction();
if (F.empty())
return false;
CLI = MF->getSubtarget().getCallLowering();
@@ -1252,6 +1249,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
assert(PendingPHIs.empty() && "stale PHIs");
+ if (!DL->isLittleEndian()) {
+ // Currently we don't properly handle big endian code.
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to translate in big endian mode";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ }
+
// Release the per-function state when we return, whether we succeeded or not.
auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
@@ -1276,12 +1281,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Lower the actual args into this basic block.
SmallVector<unsigned, 8> VRegArgs;
- for (const Argument &Arg: F.args())
+ for (const Argument &Arg: F.args()) {
+ if (DL->getTypeStoreSize(Arg.getType()) == 0)
+ continue; // Don't handle zero sized types.
VRegArgs.push_back(getOrCreateVReg(Arg));
+ }
if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- MF->getFunction()->getSubprogram(),
- &MF->getFunction()->getEntryBlock());
+ F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
@@ -1298,14 +1305,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
if (translate(Inst))
continue;
- std::string InstStrStorage;
- raw_string_ostream InstStr(InstStrStorage);
- InstStr << Inst;
-
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
Inst.getDebugLoc(), &BB);
- R << "unable to translate instruction: " << ore::NV("Opcode", &Inst)
- << ": '" << InstStr.str() << "'";
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+ if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
+
+ R << ": '" << InstStr.str() << "'";
+ }
+
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
}
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index a16e14fe2db6..422cc2219aa8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -19,18 +19,29 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "instruction-select"
using namespace llvm;
+#ifdef LLVM_GISEL_COV_PREFIX
+static cl::opt<std::string>
+ CoveragePrefix("gisel-coverage-prefix", cl::init(LLVM_GISEL_COV_PREFIX),
+ cl::desc("Record GlobalISel rule coverage files of this "
+ "prefix if instrumentation was generated"));
+#else
+static const std::string CoveragePrefix = "";
+#endif
+
char InstructionSelect::ID = 0;
INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
@@ -66,6 +77,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
// An optimization remark emitter. Used to report failures.
@@ -127,7 +139,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (!ISel->select(MI)) {
+ if (!ISel->select(MI, CoverageInfo)) {
// FIXME: It would be nice to dump all inserted instructions. It's
// not obvious how, esp. considering select() can insert after MI.
reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
@@ -177,7 +189,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() != NumBlocks) {
MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
- MF.getFunction()->getSubprogram(),
+ MF.getFunction().getSubprogram(),
/*MBB=*/nullptr);
R << "inserting blocks is not supported yet";
reportGISelFailure(MF, TPC, MORE, R);
@@ -187,6 +199,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
+ CoverageInfo.emit(CoveragePrefix,
+ MF.getSubtarget()
+ .getTargetLowering()
+ ->getTargetMachine()
+ .getTarget()
+ .getBackendName());
+
// FIXME: Should we accurately track changes?
return true;
}
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index bf427225d6a9..88669bd68c00 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -6,8 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// This file implements the InstructionSelector class.
+//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -16,14 +18,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#define DEBUG_TYPE "instructionselector"
@@ -31,7 +30,7 @@
using namespace llvm;
InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
- : Renderers(MaxRenderers, nullptr), MIs() {}
+ : Renderers(MaxRenderers), MIs() {}
InstructionSelector::InstructionSelector() = default;
@@ -100,7 +99,30 @@ bool InstructionSelector::isOperandImmEqual(
return false;
}
-bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const {
+bool InstructionSelector::isBaseWithConstantOffset(
+ const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+ if (!Root.isReg())
+ return false;
+
+ MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+ if (RootI->getOpcode() != TargetOpcode::G_GEP)
+ return false;
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+ if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ return true;
+}
+
+bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
+ MachineInstr &IntoMI) const {
+ // Immediate neighbours are already folded.
+ if (MI.getParent() == IntoMI.getParent() &&
+ std::next(MI.getIterator()) == IntoMI.getIterator())
+ return true;
+
return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
MI.implicit_operands().begin() == MI.implicit_operands().end();
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index b699156c568b..f09b0d9f11e7 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -14,14 +14,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <iterator>
@@ -50,79 +53,18 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
void Legalizer::init(MachineFunction &MF) {
}
-bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII,
- MachineIRBuilder &MIRBuilder) {
- if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+static bool isArtifact(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
return false;
-
- unsigned NumDefs = MI.getNumOperands() - 1;
- unsigned SrcReg = MI.getOperand(NumDefs).getReg();
- MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg);
- if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
- return false;
-
- const unsigned NumMergeRegs = MergeI.getNumOperands() - 1;
-
- if (NumMergeRegs < NumDefs) {
- if (NumDefs % NumMergeRegs != 0)
- return false;
-
- MIRBuilder.setInstr(MI);
- // Transform to UNMERGEs, for example
- // %1 = G_MERGE_VALUES %4, %5
- // %9, %10, %11, %12 = G_UNMERGE_VALUES %1
- // to
- // %9, %10 = G_UNMERGE_VALUES %4
- // %11, %12 = G_UNMERGE_VALUES %5
-
- const unsigned NewNumDefs = NumDefs / NumMergeRegs;
- for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
- SmallVector<unsigned, 2> DstRegs;
- for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
- ++j, ++DefIdx)
- DstRegs.push_back(MI.getOperand(DefIdx).getReg());
-
- MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg());
- }
-
- } else if (NumMergeRegs > NumDefs) {
- if (NumMergeRegs % NumDefs != 0)
- return false;
-
- MIRBuilder.setInstr(MI);
- // Transform to MERGEs
- // %6 = G_MERGE_VALUES %17, %18, %19, %20
- // %7, %8 = G_UNMERGE_VALUES %6
- // to
- // %7 = G_MERGE_VALUES %17, %18
- // %8 = G_MERGE_VALUES %19, %20
-
- const unsigned NumRegs = NumMergeRegs / NumDefs;
- for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
- SmallVector<unsigned, 2> Regs;
- for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx)
- Regs.push_back(MergeI.getOperand(Idx).getReg());
-
- MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
- }
-
- } else {
- // FIXME: is a COPY appropriate if the types mismatch? We know both
- // registers are allocatable by now.
- if (MRI.getType(MI.getOperand(0).getReg()) !=
- MRI.getType(MergeI.getOperand(1).getReg()))
- return false;
-
- for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
- MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
- MergeI.getOperand(Idx + 1).getReg());
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return true;
}
-
- MI.eraseFromParent();
- if (MRI.use_empty(MergeI.getOperand(0).getReg()))
- MergeI.eraseFromParent();
- return true;
}
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
@@ -136,79 +78,108 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
LegalizerHelper Helper(MF);
- // FIXME: an instruction may need more than one pass before it is legal. For
- // example on most architectures <3 x i3> is doubly-illegal. It would
- // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We
- // probably want a worklist of instructions rather than naive iterate until
- // convergence for performance reasons.
- bool Changed = false;
- MachineBasicBlock::iterator NextMI;
- for (auto &MBB : MF) {
- for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
- // Get the next Instruction before we try to legalize, because there's a
- // good chance MI will be deleted.
- NextMI = std::next(MI);
+ const size_t NumBlocks = MF.size();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Populate Insts
+ GISelWorkList<256> InstList;
+ GISelWorkList<128> ArtifactList;
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ // Perform legalization bottom up so we can DCE as we legalize.
+ // Traverse BB in RPOT and within each basic block, add insts top down,
+ // so when we pop_back_val in the legalization process, we traverse bottom-up.
+ for (auto *MBB : RPOT) {
+ if (MBB->empty())
+ continue;
+ for (MachineInstr &MI : *MBB) {
// Only legalize pre-isel generic instructions: others don't have types
// and are assumed to be legal.
- if (!isPreISelGenericOpcode(MI->getOpcode()))
+ if (!isPreISelGenericOpcode(MI.getOpcode()))
continue;
- unsigned NumNewInsns = 0;
- SmallVector<MachineInstr *, 4> WorkList;
- Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
- // Only legalize pre-isel generic instructions.
- // Legalization process could generate Target specific pseudo
- // instructions with generic types. Don't record them
- if (isPreISelGenericOpcode(MI->getOpcode())) {
- ++NumNewInsns;
- WorkList.push_back(MI);
- }
- });
- WorkList.push_back(&*MI);
-
- bool Changed = false;
- LegalizerHelper::LegalizeResult Res;
- unsigned Idx = 0;
- do {
- Res = Helper.legalizeInstrStep(*WorkList[Idx]);
- // Error out if we couldn't legalize this instruction. We may want to
- // fall back to DAG ISel instead in the future.
- if (Res == LegalizerHelper::UnableToLegalize) {
- Helper.MIRBuilder.stopRecordingInsertions();
- if (Res == LegalizerHelper::UnableToLegalize) {
- reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
- "unable to legalize instruction",
- *WorkList[Idx]);
- return false;
- }
- }
- Changed |= Res == LegalizerHelper::Legalized;
- ++Idx;
-
-#ifndef NDEBUG
- if (NumNewInsns) {
- DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
- for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
- I != E; ++I)
- DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
- NumNewInsns = 0;
- }
-#endif
- } while (Idx < WorkList.size());
-
- Helper.MIRBuilder.stopRecordingInsertions();
+ if (isArtifact(MI))
+ ArtifactList.insert(&MI);
+ else
+ InstList.insert(&MI);
}
}
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- for (auto &MBB : MF) {
- for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
- // Get the next Instruction before we try to legalize, because there's a
- // good chance MI will be deleted.
- NextMI = std::next(MI);
- Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder);
+ Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+ // Only legalize pre-isel generic instructions.
+ // Legalization process could generate Target specific pseudo
+ // instructions with generic types. Don't record them
+ if (isPreISelGenericOpcode(MI->getOpcode())) {
+ if (isArtifact(*MI))
+ ArtifactList.insert(MI);
+ else
+ InstList.insert(MI);
}
+ DEBUG(dbgs() << ".. .. New MI: " << *MI;);
+ });
+ const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
+ LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
+ auto RemoveDeadInstFromLists = [&InstList,
+ &ArtifactList](MachineInstr *DeadMI) {
+ InstList.remove(DeadMI);
+ ArtifactList.remove(DeadMI);
+ };
+ bool Changed = false;
+ do {
+ while (!InstList.empty()) {
+ MachineInstr &MI = *InstList.pop_back_val();
+ assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ if (isTriviallyDead(MI, MRI)) {
+ DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+
+ // Do the legalization for this instruction.
+ auto Res = Helper.legalizeInstrStep(MI);
+ // Error out if we couldn't legalize this instruction. We may want to
+ // fall back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ Helper.MIRBuilder.stopRecordingInsertions();
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", MI);
+ return false;
+ }
+ Changed |= Res == LegalizerHelper::Legalized;
+ }
+ while (!ArtifactList.empty()) {
+ MachineInstr &MI = *ArtifactList.pop_back_val();
+ assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ if (isTriviallyDead(MI, MRI)) {
+ DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ RemoveDeadInstFromLists(&MI);
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+ if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
+ for (auto *DeadMI : DeadInstructions) {
+ DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+ RemoveDeadInstFromLists(DeadMI);
+ DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ Changed = true;
+ continue;
+ }
+ // If this was not an artifact (that could be combined away), this might
+ // need special handling. Add it to InstList, so when it's processed
+ // there, it has to be legal or specially handled.
+ else
+ InstList.insert(&MI);
+ }
+ } while (!InstList.empty());
+
+ // For now don't support if new blocks are inserted - we would need to fix the
+ // outerloop for that.
+ if (MF.size() != NumBlocks) {
+ MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
+ MF.getFunction().getSubprogram(),
+ /*MBB=*/nullptr);
+ R << "inserting blocks is not supported yet";
+ reportGISelFailure(MF, TPC, MORE, R);
+ return false;
}
return Changed;
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 5258370e6680..87a658be4c29 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -17,12 +17,11 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <sstream>
#define DEBUG_TYPE "legalizer"
@@ -91,6 +90,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+ case TargetOpcode::G_FSUB:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
+ case TargetOpcode::G_FMUL:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
+ case TargetOpcode::G_FDIV:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
case TargetOpcode::G_FREM:
return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
case TargetOpcode::G_FPOW:
@@ -128,7 +136,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
- auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
MIRBuilder.setInstr(MI);
@@ -146,6 +154,9 @@ LegalizerHelper::libcall(MachineInstr &MI) {
break;
}
case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
@@ -169,12 +180,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MIRBuilder.setInstr(MI);
+ int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
- int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
- NarrowTy.getSizeInBits();
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
SmallVector<unsigned, 2> DstRegs;
for (int i = 0; i < NumParts; ++i) {
@@ -187,9 +204,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_ADD: {
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
// Expand in terms of carry-setting/consuming G_ADDE instructions.
- int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
- NarrowTy.getSizeInBits();
+ int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
@@ -217,9 +237,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (TypeIdx != 1)
return UnableToLegalize;
- int64_t NarrowSize = NarrowTy.getSizeInBits();
- int NumParts =
- MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+ int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ // FIXME: add support for when SizeOp1 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp1 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp1 / NarrowSize;
SmallVector<unsigned, 2> SrcRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
@@ -266,12 +289,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_INSERT: {
- if (TypeIdx != 0)
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
- int64_t NarrowSize = NarrowTy.getSizeInBits();
- int NumParts =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ int NumParts = SizeOp0 / NarrowSize;
SmallVector<unsigned, 2> SrcRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
@@ -326,9 +349,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_LOAD: {
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- int NumParts =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
LLT OffsetTy = LLT::scalar(
MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
@@ -353,9 +378,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_STORE: {
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- int NumParts =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
LLT OffsetTy = LLT::scalar(
MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
@@ -377,11 +404,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- int NumParts =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<unsigned, 2> DstRegs;
for (int i = 0; i < NumParts; ++i) {
@@ -396,6 +425,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_OR: {
+ // Legalize bitwise operation:
+ // A = BinOp<Ty> B, C
+ // into:
+ // B1, ..., BN = G_UNMERGE_VALUES B
+ // C1, ..., CN = G_UNMERGE_VALUES C
+ // A1 = BinOp<Ty/N> B1, C2
+ // ...
+ // AN = BinOp<Ty/N> BN, CN
+ // A = G_MERGE_VALUES A1, ..., AN
+
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
+
+ // List the registers where the destination will be scattered.
+ SmallVector<unsigned, 2> DstRegs;
+ // List the registers where the first argument will be split.
+ SmallVector<unsigned, 2> SrcsReg1;
+ // List the registers where the second argument will be split.
+ SmallVector<unsigned, 2> SrcsReg2;
+ // Create all the temporary registers.
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
+
+ DstRegs.push_back(DstReg);
+ SrcsReg1.push_back(SrcReg1);
+ SrcsReg2.push_back(SrcReg2);
+ }
+ // Explode the big arguments into smaller chunks.
+ MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
+ MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
+
+ // Do the operation on each small part.
+ for (int i = 0; i < NumParts; ++i)
+ MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
+
+ // Gather the destination registers into the final destination.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -597,22 +673,58 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_FCMP: {
+ unsigned Op0Ext, Op1Ext, DstReg;
+ unsigned Cmp1 = MI.getOperand(2).getReg();
+ unsigned Cmp2 = MI.getOperand(3).getReg();
+ if (TypeIdx == 0) {
+ Op0Ext = Cmp1;
+ Op1Ext = Cmp2;
+ DstReg = MRI.createGenericVirtualRegister(WideTy);
+ } else {
+ Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+ Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+ DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1);
+ MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2);
+ }
+ MIRBuilder.buildFCmp(
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
+ DstReg, Op0Ext, Op1Ext);
+ if (TypeIdx == 0)
+ MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
+ DstReg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ICMP: {
- assert(TypeIdx == 1 && "unable to legalize predicate");
bool IsSigned = CmpInst::isSigned(
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
- unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy);
- unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy);
- if (IsSigned) {
- MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg());
- MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg());
+ unsigned Cmp1 = MI.getOperand(2).getReg();
+ unsigned Cmp2 = MI.getOperand(3).getReg();
+ unsigned Op0Ext, Op1Ext, DstReg;
+ if (TypeIdx == 0) {
+ Op0Ext = Cmp1;
+ Op1Ext = Cmp2;
+ DstReg = MRI.createGenericVirtualRegister(WideTy);
} else {
- MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg());
- MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg());
+ Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+ Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+ DstReg = MI.getOperand(0).getReg();
+ if (IsSigned) {
+ MIRBuilder.buildSExt(Op0Ext, Cmp1);
+ MIRBuilder.buildSExt(Op1Ext, Cmp2);
+ } else {
+ MIRBuilder.buildZExt(Op0Ext, Cmp1);
+ MIRBuilder.buildZExt(Op1Ext, Cmp2);
+ }
}
MIRBuilder.buildICmp(
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
- MI.getOperand(0).getReg(), Op0Ext, Op1Ext);
+ DstReg, Op0Ext, Op1Ext);
+ if (TypeIdx == 0)
+ MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
+ DstReg);
MI.eraseFromParent();
return Legalized;
}
@@ -623,6 +735,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.getOperand(2).setReg(OffsetExt);
return Legalized;
}
+ case TargetOpcode::G_PHI: {
+ assert(TypeIdx == 0 && "Expecting only Idx 0");
+ auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) {
+ auto FirstTermIt = MBB.getFirstTerminator();
+ MIRBuilder.setInsertPt(MBB, FirstTermIt);
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ MachineInstrBuilder MIB;
+ if (DefMI->getOpcode() == TargetOpcode::G_TRUNC)
+ MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy,
+ DefMI->getOperand(1).getReg());
+ else
+ MIB = MIRBuilder.buildAnyExt(WideTy, Reg);
+ return MIB->getOperand(0).getReg();
+ };
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy);
+ for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end();
+ OpIt != OpE;) {
+ unsigned Reg = OpIt++->getReg();
+ MachineBasicBlock *OpMBB = OpIt++->getMBB();
+ MIB.addReg(getExtendedReg(Reg, *OpMBB));
+ MIB.addMBB(OpMBB);
+ }
+ auto *MBB = MI.getParent();
+ MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(),
+ MIB->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -683,7 +824,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
unsigned Res = MI.getOperand(0).getReg();
Type *ZeroTy;
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (Ty.getSizeInBits()) {
case 16:
ZeroTy = Type::getHalfTy(Ctx);
@@ -726,6 +867,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ unsigned OldValRes = MI.getOperand(0).getReg();
+ unsigned SuccessRes = MI.getOperand(1).getReg();
+ unsigned Addr = MI.getOperand(2).getReg();
+ unsigned CmpVal = MI.getOperand(3).getReg();
+ unsigned NewVal = MI.getOperand(4).getReg();
+ MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
+ **MI.memoperands_begin());
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -741,7 +894,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_ADD: {
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned DstReg = MI.getOperand(0).getReg();
- int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize;
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ int NumParts = Size / NarrowSize;
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
MIRBuilder.setInstr(MI);
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 76917aa9660d..9c27c59a0654 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -22,51 +22,136 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOpcodes.h"
#include <algorithm>
-#include <cassert>
-#include <tuple>
-#include <utility>
-
+#include <map>
using namespace llvm;
-LegalizerInfo::LegalizerInfo() {
- DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
-
- // FIXME: these two can be legalized to the fundamental load/store Jakob
- // proposed. Once loads & stores are supported.
- DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
- DefaultActions[TargetOpcode::G_TRUNC] = Legal;
+LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+ // Set defaults.
+ // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
+ // fundamental load/store Jakob proposed. Once loads & stores are supported.
+ setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}});
- DefaultActions[TargetOpcode::G_INTRINSIC] = Legal;
- DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal;
+ setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
+ setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
- DefaultActions[TargetOpcode::G_ADD] = NarrowScalar;
- DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar;
- DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall);
- DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
- DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
- DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
- DefaultActions[TargetOpcode::G_FNEG] = Lower;
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+ setLegalizeScalarToDifferentSizeStrategy(
+ TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall);
+ setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}});
}
void LegalizerInfo::computeTables() {
- for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) {
- for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) {
- for (auto &Action : Actions[Opcode][Idx]) {
- LLT Ty = Action.first;
- if (!Ty.isVector())
- continue;
-
- auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp,
- Ty.getElementType())];
- Entry = std::max(Entry, Ty.getNumElements());
+ assert(TablesInitialized == false);
+
+ for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) {
+ const unsigned Opcode = FirstOp + OpcodeIdx;
+ for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size();
+ ++TypeIdx) {
+ // 0. Collect information specified through the setAction API, i.e.
+ // for specific bit sizes.
+ // For scalar types:
+ SizeAndActionsVec ScalarSpecifiedActions;
+ // For pointer types:
+ std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions;
+ // For vector types:
+ std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions;
+ for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) {
+ const LLT Type = LLT2Action.first;
+ const LegalizeAction Action = LLT2Action.second;
+
+ auto SizeAction = std::make_pair(Type.getSizeInBits(), Action);
+ if (Type.isPointer())
+ AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back(
+ SizeAction);
+ else if (Type.isVector())
+ ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()]
+ .push_back(SizeAction);
+ else
+ ScalarSpecifiedActions.push_back(SizeAction);
+ }
+
+ // 1. Handle scalar types
+ {
+ // Decide how to handle bit sizes for which no explicit specification
+ // was given.
+ SizeChangeStrategy S = &unsupportedForDifferentSizes;
+ if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
+ ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end());
+ checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
+ setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
}
+
+ // 2. Handle pointer types
+ for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
+ std::sort(PointerSpecifiedActions.second.begin(),
+ PointerSpecifiedActions.second.end());
+ checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
+ // For pointer types, we assume that there isn't a meaningfull way
+ // to change the number of bits used in the pointer.
+ setPointerAction(
+ Opcode, TypeIdx, PointerSpecifiedActions.first,
+ unsupportedForDifferentSizes(PointerSpecifiedActions.second));
+ }
+
+ // 3. Handle vector types
+ SizeAndActionsVec ElementSizesSeen;
+ for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
+ std::sort(VectorSpecifiedActions.second.begin(),
+ VectorSpecifiedActions.second.end());
+ const uint16_t ElementSize = VectorSpecifiedActions.first;
+ ElementSizesSeen.push_back({ElementSize, Legal});
+ checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
+ // For vector types, we assume that the best way to adapt the number
+ // of elements is to the next larger number of elements type for which
+ // the vector type is legal, unless there is no such type. In that case,
+ // legalize towards a vector type with a smaller number of elements.
+ SizeAndActionsVec NumElementsActions;
+ for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) {
+ assert(BitsizeAndAction.first % ElementSize == 0);
+ const uint16_t NumElements = BitsizeAndAction.first / ElementSize;
+ NumElementsActions.push_back({NumElements, BitsizeAndAction.second});
+ }
+ setVectorNumElementAction(
+ Opcode, TypeIdx, ElementSize,
+ moreToWiderTypesAndLessToWidest(NumElementsActions));
+ }
+ std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+ SizeChangeStrategy VectorElementSizeChangeStrategy =
+ &unsupportedForDifferentSizes;
+ if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+ VectorElementSizeChangeStrategy =
+ VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx];
+ setScalarInVectorAction(
+ Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen));
}
}
@@ -82,62 +167,23 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
assert(TablesInitialized && "backend forgot to call computeTables");
// These *have* to be implemented for now, they're the fundamental basis of
// how everything else is transformed.
+ if (Aspect.Type.isScalar() || Aspect.Type.isPointer())
+ return findScalarLegalAction(Aspect);
+ assert(Aspect.Type.isVector());
+ return findVectorLegalAction(Aspect);
+}
- // FIXME: the long-term plan calls for expansion in terms of load/store (if
- // they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
- Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
- return std::make_pair(Legal, Aspect.Type);
-
- LLT Ty = Aspect.Type;
- LegalizeAction Action = findInActions(Aspect);
- // LegalizerHelper is not able to handle non-power-of-2 types right now, so do
- // not try to legalize them unless they are marked as Legal or Custom.
- // FIXME: This is a temporary hack until the general non-power-of-2
- // legalization works.
- if (!isPowerOf2_64(Ty.getSizeInBits()) &&
- !(Action == Legal || Action == Custom))
- return std::make_pair(Unsupported, LLT());
-
- if (Action != NotFound)
- return findLegalAction(Aspect, Action);
-
- unsigned Opcode = Aspect.Opcode;
- if (!Ty.isVector()) {
- auto DefaultAction = DefaultActions.find(Aspect.Opcode);
- if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
- return std::make_pair(Legal, Ty);
-
- if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower)
- return std::make_pair(Lower, Ty);
-
- if (DefaultAction == DefaultActions.end() ||
- DefaultAction->second != NarrowScalar)
- return std::make_pair(Unsupported, LLT());
- return findLegalAction(Aspect, NarrowScalar);
- }
-
- LLT EltTy = Ty.getElementType();
- int NumElts = Ty.getNumElements();
-
- auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy));
- if (ScalarAction != ScalarInVectorActions.end() &&
- ScalarAction->second != Legal)
- return findLegalAction(Aspect, ScalarAction->second);
-
- // The element type is legal in principle, but the number of elements is
- // wrong.
- auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy));
- if (MaxLegalElts > NumElts)
- return findLegalAction(Aspect, MoreElements);
-
- if (MaxLegalElts == 0) {
- // Scalarize if there's no legal vector type, which is just a special case
- // of FewerElements.
- return std::make_pair(FewerElements, EltTy);
- }
-
- return findLegalAction(Aspect, FewerElements);
+/// Helper function to get LLT for the given type index.
+static LLT getTypeFromTypeIdx(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, unsigned OpIdx,
+ unsigned TypeIdx) {
+ assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx");
+ // G_UNMERGE_VALUES has variable number of operands, but there is only
+ // one source type and one destination type as all destinations must be the
+ // same type. So, get the last operand if TypeIdx == 1.
+ if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1)
+ return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg());
+ return MRI.getType(MI.getOperand(OpIdx).getReg());
}
std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
@@ -145,19 +191,20 @@ LegalizerInfo::getAction(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
SmallBitVector SeenTypes(8);
const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+ // FIXME: probably we'll need to cache the results here somehow?
for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
if (!OpInfo[i].isGenericType())
continue;
- // We don't want to repeatedly check the same operand index, that
- // could get expensive.
+ // We must only record actions once for each TypeIdx; otherwise we'd
+ // try to legalize operands multiple times down the line.
unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
if (SeenTypes[TypeIdx])
continue;
SeenTypes.set(TypeIdx);
- LLT Ty = MRI.getType(MI.getOperand(i).getReg());
+ LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx);
auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
if (Action.first != Legal)
return std::make_tuple(Action.first, TypeIdx, Action.second);
@@ -170,38 +217,166 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
return std::get<0>(getAction(MI, MRI)) == Legal;
}
-Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
- LegalizeAction Action) const {
- switch(Action) {
- default:
- llvm_unreachable("Cannot find legal type");
+bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return false;
+}
+
+LegalizerInfo::SizeAndActionsVec
+LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
+ const SizeAndActionsVec &v, LegalizeAction IncreaseAction,
+ LegalizeAction DecreaseAction) {
+ SizeAndActionsVec result;
+ unsigned LargestSizeSoFar = 0;
+ if (v.size() >= 1 && v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ LargestSizeSoFar = v[i].first;
+ if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) {
+ result.push_back({LargestSizeSoFar + 1, IncreaseAction});
+ LargestSizeSoFar = v[i].first + 1;
+ }
+ }
+ result.push_back({LargestSizeSoFar + 1, DecreaseAction});
+ return result;
+}
+
+LegalizerInfo::SizeAndActionsVec
+LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest(
+ const SizeAndActionsVec &v, LegalizeAction DecreaseAction,
+ LegalizeAction IncreaseAction) {
+ SizeAndActionsVec result;
+ if (v.size() == 0 || v[0].first != 1)
+ result.push_back({1, IncreaseAction});
+ for (size_t i = 0; i < v.size(); ++i) {
+ result.push_back(v[i]);
+ if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) {
+ result.push_back({v[i].first + 1, DecreaseAction});
+ }
+ }
+ return result;
+}
+
+LegalizerInfo::SizeAndAction
+LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
+ assert(Size >= 1);
+ // Find the last element in Vec that has a bitsize equal to or smaller than
+ // the requested bit size.
+ // That is the element just before the first element that is bigger than Size.
+ auto VecIt = std::upper_bound(
+ Vec.begin(), Vec.end(), Size,
+ [](const uint32_t Size, const SizeAndAction lhs) -> bool {
+ return Size < lhs.first;
+ });
+ assert(VecIt != Vec.begin() && "Does Vec not start with size 1?");
+ --VecIt;
+ int VecIdx = VecIt - Vec.begin();
+
+ LegalizeAction Action = Vec[VecIdx].second;
+ switch (Action) {
case Legal:
case Lower:
case Libcall:
case Custom:
- return Aspect.Type;
+ return {Size, Action};
+ case FewerElements:
+ // FIXME: is this special case still needed and correct?
+ // Special case for scalarization:
+ if (Vec == SizeAndActionsVec({{1, FewerElements}}))
+ return {1, FewerElements};
+ LLVM_FALLTHROUGH;
case NarrowScalar: {
- return findLegalizableSize(
- Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
- }
- case WidenScalar: {
- return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
- return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
- });
- }
- case FewerElements: {
- return findLegalizableSize(
- Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+ // The following needs to be a loop, as for now, we do allow needing to
+ // go over "Unsupported" bit sizes before finding a legalizable bit size.
+ // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8,
+ // we need to iterate over s9, and then to s32 to return (s32, Legal).
+ // If we want to get rid of the below loop, we should have stronger asserts
+ // when building the SizeAndActionsVecs, probably not allowing
+ // "Unsupported" unless at the ends of the vector.
+ for (int i = VecIdx - 1; i >= 0; --i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
}
+ case WidenScalar:
case MoreElements: {
- return findLegalizableSize(
- Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ // See above, the following needs to be a loop, at least for now.
+ for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i)
+ if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+ Vec[i].second != Unsupported)
+ return {Vec[i].first, Action};
+ llvm_unreachable("");
}
+ case Unsupported:
+ return {Size, Unsupported};
+ case NotFound:
+ llvm_unreachable("NotFound");
}
+ llvm_unreachable("Action has an unknown enum value");
}
-bool LegalizerInfo::legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- return false;
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, LLT()};
+ const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+ if (Aspect.Type.isPointer() &&
+ AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
+ AddrSpace2PointerActions[OpcodeIdx].end()) {
+ return {NotFound, LLT()};
+ }
+ const SmallVector<SizeAndActionsVec, 1> &Actions =
+ Aspect.Type.isPointer()
+ ? AddrSpace2PointerActions[OpcodeIdx]
+ .find(Aspect.Type.getAddressSpace())
+ ->second
+ : ScalarActions[OpcodeIdx];
+ if (Aspect.Idx >= Actions.size())
+ return {NotFound, LLT()};
+ const SizeAndActionsVec &Vec = Actions[Aspect.Idx];
+ // FIXME: speed up this search, e.g. by using a results cache for repeated
+ // queries?
+ auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits());
+ return {SizeAndAction.second,
+ Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first)
+ : LLT::pointer(Aspect.Type.getAddressSpace(),
+ SizeAndAction.first)};
+}
+
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
+ assert(Aspect.Type.isVector());
+ // First legalize the vector element size, then legalize the number of
+ // lanes in the vector.
+ if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+ return {NotFound, Aspect.Type};
+ const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+ const unsigned TypeIdx = Aspect.Idx;
+ if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
+ return {NotFound, Aspect.Type};
+ const SizeAndActionsVec &ElemSizeVec =
+ ScalarInVectorActions[OpcodeIdx][TypeIdx];
+
+ LLT IntermediateType;
+ auto ElementSizeAndAction =
+ findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits());
+ IntermediateType =
+ LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first);
+ if (ElementSizeAndAction.second != Legal)
+ return {ElementSizeAndAction.second, IntermediateType};
+
+ auto i = NumElements2Actions[OpcodeIdx].find(
+ IntermediateType.getScalarSizeInBits());
+ if (i == NumElements2Actions[OpcodeIdx].end()) {
+ return {NotFound, IntermediateType};
+ }
+ const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx];
+ auto NumElementsAndAction =
+ findAction(NumElementsVec, IntermediateType.getNumElements());
+ return {NumElementsAndAction.second,
+ LLT::vector(NumElementsAndAction.first,
+ IntermediateType.getScalarSizeInBits())};
}
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index c5d0999fe438..8e16470b6f90 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -101,7 +101,8 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
// Don't try to be smart for the insertion point.
// There is no guarantee that the first seen use is the first
// use in the block.
- InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+ LocalizedMI);
// Set a new register for the definition.
unsigned NewReg =
@@ -112,7 +113,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
}
- DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second)
+ DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
<< '\n');
// Update the user reg.
MOUse.setReg(NewVRegIt->second);
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 4636806c3f08..475bb82e5b9c 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -15,10 +15,10 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -83,30 +83,26 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue(
- unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return buildInstr(TargetOpcode::DBG_VALUE)
- .addReg(Reg, RegState::Debug)
- .addReg(0, RegState::Debug)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+ /*IsIndirect*/ false, Reg, Variable, Expr));
}
-MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue(
- unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return buildInstr(TargetOpcode::DBG_VALUE)
- .addReg(Reg, RegState::Debug)
- .addImm(Offset)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+ /*IsIndirect*/ true, Reg, Variable, Expr));
}
MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -124,7 +120,6 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
- unsigned Offset,
const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
@@ -144,7 +139,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
MIB.addReg(0U);
}
- return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr);
+ return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
}
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
@@ -268,7 +263,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
const ConstantInt *NewVal = &Val;
if (Ty.getSizeInBits() != Val.getBitWidth())
- NewVal = ConstantInt::get(MF->getFunction()->getContext(),
+ NewVal = ConstantInt::get(MF->getFunction().getContext(),
Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
@@ -276,7 +271,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
int64_t Val) {
- auto IntN = IntegerType::get(MF->getFunction()->getContext(),
+ auto IntN = IntegerType::get(MF->getFunction().getContext(),
MRI->getType(Res).getSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
@@ -351,14 +346,17 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder
+MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) {
+ assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
+ TargetOpcode::G_SEXT == ExtOpc) &&
+ "Expecting Extending Opc");
assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
- Opcode = TargetOpcode::G_SEXT;
+ Opcode = ExtOpc;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
else
@@ -367,20 +365,19 @@ MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
unsigned Op) {
- assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
- assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+ return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
+}
- unsigned Opcode = TargetOpcode::COPY;
- if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
- Opcode = TargetOpcode::G_ZEXT;
- else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
- Opcode = TargetOpcode::G_TRUNC;
- else
- assert(MRI->getType(Res) == MRI->getType(Op));
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
+}
- return buildInstr(Opcode).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
}
MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
@@ -661,6 +658,31 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
.addUse(Idx);
}
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+ unsigned CmpVal, unsigned NewVal,
+ MachineMemOperand &MMO) {
+#ifndef NDEBUG
+ LLT OldValResTy = MRI->getType(OldValRes);
+ LLT AddrTy = MRI->getType(Addr);
+ LLT CmpValTy = MRI->getType(CmpVal);
+ LLT NewValTy = MRI->getType(NewVal);
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(CmpValTy.isValid() && "invalid operand type");
+ assert(NewValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == CmpValTy && "type mismatch");
+ assert(OldValResTy == NewValTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG)
+ .addDef(OldValRes)
+ .addUse(Addr)
+ .addUse(CmpVal)
+ .addUse(NewVal)
+ .addMemOperand(&MMO);
+}
+
void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 677941dbbf6d..006c9ea23034 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -26,9 +26,12 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
@@ -36,9 +39,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -159,7 +159,7 @@ bool RegBankSelect::repairReg(
// same types because the type is a placeholder when this function is called.
MachineInstr *MI =
MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
- DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
+ DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
<< '\n');
// TODO:
// Check if MI is legal. if not, we need to legalize all the
@@ -221,9 +221,8 @@ uint64_t RegBankSelect::getRepairCost(
// into a new virtual register.
// We would also need to propagate this information in the
// repairing placement.
- unsigned Cost =
- RBI->copyCost(*DesiredRegBrank, *CurRegBank,
- RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
+ unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+ RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
@@ -602,9 +601,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
Mode SaveOptMode = OptMode;
- if (F->hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
OptMode = Mode::Fast;
init(MF);
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 83b21e637097..4d3ae69d3a9d 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#define DEBUG_TYPE "registerbank"
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index a841902feed1..b3d9209ae6eb 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -19,13 +19,12 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm> // For std::max.
@@ -84,7 +83,7 @@ const RegisterBank *
RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg));
+ return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -95,6 +94,19 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
return nullptr;
}
+const TargetRegisterClass &
+RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg,
+ const TargetRegisterInfo &TRI) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Reg must be a physreg");
+ const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
+ if (RegRCIt != PhysRegMinimalRCs.end())
+ return *RegRCIt->second;
+ const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg);
+ PhysRegMinimalRCs[Reg] = PhysRC;
+ return *PhysRC;
+}
+
const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI) const {
@@ -151,7 +163,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// is important. The rest is not constrained.
unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
- const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineFunction &MF = *MI.getMF();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -419,16 +431,20 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
}
unsigned OrigReg = MO.getReg();
unsigned NewReg = *NewRegs.begin();
- DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr));
+ DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
MO.setReg(NewReg);
- DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr));
+ DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
// The OperandsMapper creates plain scalar, we may have to fix that.
// Check if the types match and if not, fix that.
LLT OrigTy = MRI.getType(OrigReg);
LLT NewTy = MRI.getType(NewReg);
if (OrigTy != NewTy) {
- assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+ // The default mapping is not supposed to change the size of
+ // the storage. However, right now we don't necessarily bump all
+ // the types to storage size. For instance, we can consider
+ // s16 G_AND legal whereas the storage size is going to be 32.
+ assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
"Types with difference size cannot be handled by the default "
"mapping");
DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
@@ -441,13 +457,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI) const {
const TargetRegisterClass *RC = nullptr;
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
- RC = TRI.getMinimalPhysRegClass(Reg);
+ RC = &getMinimalPhysRegClass(Reg, TRI);
} else {
LLT Ty = MRI.getType(Reg);
unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
@@ -543,10 +559,11 @@ bool RegisterBankInfo::InstructionMapping::verify(
// For PHI, we only care about mapping the definition.
assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
"NumOperands must match, see constructor");
- assert(MI.getParent() && MI.getParent()->getParent() &&
+ assert(MI.getParent() && MI.getMF() &&
"MI must be connected to a MachineFunction");
- const MachineFunction &MF = *MI.getParent()->getParent();
- (void)MF;
+ const MachineFunction &MF = *MI.getMF();
+ const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
+ (void)RBI;
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
const MachineOperand &MO = MI.getOperand(Idx);
@@ -564,7 +581,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
(void)MOMapping;
// Register size in bits.
// This size must match what the mapping expects.
- assert(MOMapping.verify(getSizeInBits(
+ assert(MOMapping.verify(RBI->getSizeInBits(
Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
"Value mapping is invalid");
}
@@ -725,8 +742,8 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
// If we have a function, we can pretty print the name of the registers.
// Otherwise we will print the raw numbers.
const TargetRegisterInfo *TRI =
- getMI().getParent() && getMI().getParent()->getParent()
- ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo()
+ getMI().getParent() && getMI().getMF()
+ ? getMI().getMF()->getSubtarget().getRegisterInfo()
: nullptr;
bool IsFirst = true;
for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
@@ -735,13 +752,13 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
if (!IsFirst)
OS << ", ";
IsFirst = false;
- OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
+ OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
bool IsFirstNewVReg = true;
for (unsigned VReg : getVRegs(Idx)) {
if (!IsFirstNewVReg)
OS << ", ";
IsFirstNewVReg = false;
- OS << PrintReg(VReg, TRI);
+ OS << printReg(VReg, TRI);
}
OS << "])";
}
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 5ecaf5c563f8..ef990b49aceb 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -17,10 +17,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#define DEBUG_TYPE "globalisel-utils"
@@ -99,7 +99,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
const MachineInstr &MI) {
MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
MI.getDebugLoc(), MI.getParent());
- R << Msg << ": " << ore::MNV("Inst", MI);
+ R << Msg;
+ // Printing MI is expensive; only do it if expensive remarks are enabled.
+ if (MORE.allowExtraAnalysis(PassName))
+ R << ": " << ore::MNV("Inst", MI);
reportGISelFailure(MF, TPC, MORE, R);
}
@@ -126,3 +129,19 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
return nullptr;
return MI->getOperand(1).getFPImm();
}
+
+llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
+ const MachineRegisterInfo &MRI) {
+ auto *DefMI = MRI.getVRegDef(Reg);
+ auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
+ if (!DstTy.isValid())
+ return nullptr;
+ while (DefMI->getOpcode() == TargetOpcode::COPY) {
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
+ auto SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isValid() || SrcTy != DstTy)
+ break;
+ DefMI = MRI.getVRegDef(SrcReg);
+ }
+ return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index c6ca49ce24d7..8b9545da914e 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -1,4 +1,4 @@
-//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
+//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,6 +6,7 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
// This pass merges globals with internal linkage into one. This way all the
// globals which were merged into a biggest one can be addressed using offsets
// from the same base pointer (no need for separate base pointer for each of the
@@ -57,30 +58,45 @@
// - it can increase register pressure when the uses are disparate enough.
//
// We use heuristics to discover the best global grouping we can (cf cl::opts).
+//
// ===---------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "global-merge"
@@ -117,9 +133,12 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
cl::desc("Enable global merge pass on external linkage"));
STATISTIC(NumMerged, "Number of globals merged");
+
namespace {
+
class GlobalMerge : public FunctionPass {
- const TargetMachine *TM;
+ const TargetMachine *TM = nullptr;
+
// FIXME: Infer the maximum possible offset depending on the actual users
// (these max offsets are different for the users inside Thumb or ARM
// functions), see the code that passes in the offset in the ARM backend
@@ -130,15 +149,16 @@ namespace {
/// Currently, this applies a dead simple heuristic: only consider globals
/// used in minsize functions for merging.
/// FIXME: This could learn about optsize, and be used in the cost model.
- bool OnlyOptimizeForSize;
+ bool OnlyOptimizeForSize = false;
/// Whether we should merge global variables that have external linkage.
- bool MergeExternalGlobals;
+ bool MergeExternalGlobals = false;
bool IsMachO;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+
/// \brief Merge everything in \p Globals for which the corresponding bit
/// in \p GlobalSet is set.
bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
@@ -164,9 +184,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
+
explicit GlobalMerge()
- : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset),
- OnlyOptimizeForSize(false), MergeExternalGlobals(false) {
+ : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -189,9 +209,11 @@ namespace {
FunctionPass::getAnalysisUsage(AU);
}
};
+
} // end anonymous namespace
char GlobalMerge::ID = 0;
+
INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
@@ -231,9 +253,10 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// We keep track of the sets of globals used together "close enough".
struct UsedGlobalSet {
- UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {}
BitVector Globals;
- unsigned UsageCount;
+ unsigned UsageCount = 1;
+
+ UsedGlobalSet(size_t Size) : Globals(Size) {}
};
// Each set is unique in UsedGlobalSets.
@@ -363,7 +386,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
//
// Multiply that by the size of the set to give us a crude profitability
// metric.
- std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
+ std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
[](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
return UGS1.Globals.count() * UGS1.UsageCount <
UGS2.Globals.count() * UGS2.UsageCount;
@@ -545,7 +568,7 @@ bool GlobalMerge::doInitialization(Module &M) {
IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
auto &DL = M.getDataLayout();
- DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+ DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals,
BSSGlobals;
bool Changed = false;
setMustKeepGlobalVariables(M);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ff8405366173..a22ce0dab9c2 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1,4 +1,4 @@
-//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,26 +16,41 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <iterator>
+#include <memory>
#include <utility>
+#include <vector>
using namespace llvm;
@@ -77,6 +92,7 @@ STATISTIC(NumDupBBs, "Number of duplicated blocks");
STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
namespace {
+
class IfConverter : public MachineFunctionPass {
enum IfcvtKind {
ICNotClassfied, // BB data valid, but not classified.
@@ -125,21 +141,20 @@ namespace {
bool IsUnpredicable : 1;
bool CannotBeCopied : 1;
bool ClobbersPred : 1;
- unsigned NonPredSize;
- unsigned ExtraCost;
- unsigned ExtraCost2;
- MachineBasicBlock *BB;
- MachineBasicBlock *TrueBB;
- MachineBasicBlock *FalseBB;
+ unsigned NonPredSize = 0;
+ unsigned ExtraCost = 0;
+ unsigned ExtraCost2 = 0;
+ MachineBasicBlock *BB = nullptr;
+ MachineBasicBlock *TrueBB = nullptr;
+ MachineBasicBlock *FalseBB = nullptr;
SmallVector<MachineOperand, 4> BrCond;
SmallVector<MachineOperand, 4> Predicate;
+
BBInfo() : IsDone(false), IsBeingAnalyzed(false),
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
IsBrReversible(false), HasFallThrough(false),
IsUnpredicable(false), CannotBeCopied(false),
- ClobbersPred(false), NonPredSize(0), ExtraCost(0),
- ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
- FalseBB(nullptr) {}
+ ClobbersPred(false) {}
};
/// Record information about pending if-conversions to attempt:
@@ -161,6 +176,7 @@ namespace {
bool NeedSubsumption : 1;
bool TClobbersPred : 1;
bool FClobbersPred : 1;
+
IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
bool tc = false, bool fc = false)
: BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
@@ -179,17 +195,17 @@ namespace {
MachineRegisterInfo *MRI;
LivePhysRegs Redefs;
- LivePhysRegs DontKill;
bool PreRegAlloc;
bool MadeChange;
- int FnNum;
+ int FnNum = -1;
std::function<bool(const MachineFunction &)> PredicateFtor;
public:
static char ID;
+
IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
- : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
initializeIfConverterPass(*PassRegistry::getPassRegistry());
}
@@ -242,7 +258,6 @@ namespace {
void AnalyzeBlocks(MachineFunction &MF,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
void InvalidatePreds(MachineBasicBlock &MBB);
- void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -311,8 +326,9 @@ namespace {
}
};
- char IfConverter::ID = 0;
-}
+} // end anonymous namespace
+
+char IfConverter::ID = 0;
char &llvm::IfConverterID = IfConverter::ID;
@@ -321,7 +337,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
+ if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -390,12 +406,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
- DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
- " false" : "")
- << "): BB#" << BBI.BB->getNumber() << " ("
- << ((Kind == ICSimpleFalse)
- ? BBI.FalseBB->getNumber()
- : BBI.TrueBB->getNumber()) << ") ");
+ DEBUG(dbgs() << "Ifcvt (Simple"
+ << (Kind == ICSimpleFalse ? " false" : "")
+ << "): " << printMBBReference(*BBI.BB) << " ("
+ << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber())
+ << ") ");
RetVal = IfConvertSimple(BBI, Kind);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
@@ -419,9 +435,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << " false");
if (isRev)
DEBUG(dbgs() << " rev");
- DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
- << BBI.TrueBB->getNumber() << ",F:"
- << BBI.FalseBB->getNumber() << ") ");
+ DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertTriangle(BBI, Kind);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
@@ -435,24 +451,22 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
}
break;
}
- case ICDiamond: {
+ case ICDiamond:
if (DisableDiamond) break;
- DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
- << BBI.TrueBB->getNumber() << ",F:"
- << BBI.FalseBB->getNumber() << ") ");
+ DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
Token->TClobbersPred,
Token->FClobbersPred);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) ++NumDiamonds;
break;
- }
- case ICForkedDiamond: {
+ case ICForkedDiamond:
if (DisableForkedDiamond) break;
- DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
- << BBI.BB->getNumber() << " (T:"
- << BBI.TrueBB->getNumber() << ",F:"
- << BBI.FalseBB->getNumber() << ") ");
+ DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
Token->TClobbersPred,
Token->FClobbersPred);
@@ -460,7 +474,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (RetVal) ++NumForkedDiamonds;
break;
}
- }
+
+ if (RetVal && MRI->tracksLiveness())
+ recomputeLivenessFlags(*BBI.BB);
Change |= RetVal;
@@ -616,7 +632,6 @@ bool IfConverter::CountDuplicatedInstructions(
unsigned &Dups1, unsigned &Dups2,
MachineBasicBlock &TBB, MachineBasicBlock &FBB,
bool SkipUnconditionalBranches) const {
-
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
TIB = skipDebugInstructionsForward(TIB, TIE);
@@ -1342,19 +1357,10 @@ static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
}
-/// Remove true / false edges if either / both are no longer successors.
-void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond))
- BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-}
-
/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
/// values defined in MI which are also live/used by MI.
static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
- const TargetRegisterInfo *TRI = MI.getParent()->getParent()
- ->getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo();
// Before stepping forward past MI, remember which regs were live
// before MI. This is needed to set the Undef flag only when reg is
@@ -1374,7 +1380,7 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
unsigned Reg = Clobber.first;
MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
MachineInstr *OpMI = Op.getParent();
- MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
+ MachineInstrBuilder MIB(*OpMI->getMF(), OpMI);
if (Op.isRegMask()) {
// First handle regmasks. They clobber any entries in the mask which
// means that we need a def for those registers.
@@ -1389,13 +1395,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
MIB.addReg(Reg, RegState::Implicit | RegState::Define);
continue;
}
- assert(Op.isReg() && "Register operand required");
- if (Op.isDead()) {
- // If we found a dead def, but it needs to be live, then remove the dead
- // flag.
- if (Redefs.contains(Op.getReg()))
- Op.setIsDead(false);
- }
if (LiveBeforeMI.count(Reg))
MIB.addReg(Reg, RegState::Implicit);
else {
@@ -1412,26 +1411,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
}
}
-/// Remove kill flags from operands with a registers in the \p DontKill set.
-static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
- for (MIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->isKill())
- continue;
- if (DontKill.contains(O->getReg()))
- O->setIsKill(false);
- }
-}
-
-/// Walks a range of machine instructions and removes kill flags for registers
-/// in the \p DontKill set.
-static void RemoveKills(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator E,
- const LivePhysRegs &DontKill,
- const MCRegisterInfo &MCRI) {
- for (MachineInstr &MI : make_range(I, E))
- RemoveKills(MI, DontKill);
-}
-
/// If convert a simple (split, no rejoin) sub-CFG.
bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
@@ -1462,16 +1441,12 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
llvm_unreachable("Unable to reverse branch condition!");
Redefs.init(*TRI);
- DontKill.init(*TRI);
if (MRI->tracksLiveness()) {
// Initialize liveins to the first BB. These are potentiall redefined by
// predicated instructions.
Redefs.addLiveIns(CvtMBB);
Redefs.addLiveIns(NextMBB);
- // Compute a set of registers which must not be killed by instructions in
- // BB1: This is everything live-in to BB2.
- DontKill.addLiveIns(NextMBB);
}
// Remove the branches from the entry so we can add the contents of the true
@@ -1483,15 +1458,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
- // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
- // explicitly remove CvtBBI as a successor.
+ // Keep the CFG updated.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
// Predicate the instructions in the true block.
- RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
- // Merge converted block into entry block.
+ // Merge converted block into entry block. The BB to Cvt edge is removed
+ // by MergeBlocks.
MergeBlocks(BBI, *CvtBBI);
}
@@ -1512,8 +1486,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
IterIfcvt = false;
}
- RemoveExtraEdges(BBI);
-
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
@@ -1578,8 +1550,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
Redefs.addLiveIns(NextMBB);
}
- DontKill.clear();
-
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
@@ -1599,10 +1569,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
-
- // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
- // explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
@@ -1612,6 +1578,9 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
MergeBlocks(BBI, *CvtBBI, false);
}
+ // Keep the CFG updated.
+ BBI.BB->removeSuccessor(&CvtMBB, true);
+
// If 'true' block has a 'false' successor, add an exit branch to it.
if (HasEarlyExit) {
SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
@@ -1659,8 +1628,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
IterIfcvt = false;
}
- RemoveExtraEdges(BBI);
-
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
@@ -1765,14 +1732,7 @@ bool IfConverter::IfConvertDiamondCommon(
--NumDups1;
}
- // Compute a set of registers which must not be killed by instructions in BB1:
- // This is everything used+live in BB2 after the duplicated instructions. We
- // can compute this set by simulating liveness backwards from the end of BB2.
- DontKill.init(*TRI);
if (MRI->tracksLiveness()) {
- for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
- DontKill.stepBackward(MI);
-
for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
Redefs.stepForward(MI, Dummy);
@@ -1802,10 +1762,6 @@ bool IfConverter::IfConvertDiamondCommon(
}
MBB1.erase(DI1, MBB1.end());
- // Kill flags in the true block for registers living into the false block
- // must be removed.
- RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
-
DI2 = BBI2->BB->end();
// The branches have been checked to match. Skip over the branch in the false
// block so that we don't try to predicate it.
@@ -1923,8 +1879,6 @@ bool IfConverter::IfConvertForkedDiamond(
TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
TrueBBI.BrCond, dl);
- RemoveExtraEdges(BBI);
-
// Update block info.
BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
InvalidatePreds(*BBI.BB);
@@ -1961,6 +1915,11 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// fold the tail block in as well. Otherwise, unless it falls through to the
// tail, add a unconditional branch to it.
if (TailBB) {
+ // We need to remove the edges to the true and false blocks manually since
+ // we didn't let IfConvertDiamondCommon update the CFG.
+ BBI.BB->removeSuccessor(TrueBBI.BB);
+ BBI.BB->removeSuccessor(FalseBBI.BB, true);
+
BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough &&
!TailBBI.BB->hasAddressTaken();
@@ -1990,13 +1949,6 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
}
- // RemoveExtraEdges won't work if the block has an unanalyzable branch,
- // which can happen here if TailBB is unanalyzable and is merged, so
- // explicitly remove BBI1 and BBI2 as successors.
- BBI.BB->removeSuccessor(TrueBBI.BB);
- BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
- RemoveExtraEdges(BBI);
-
// Update block info.
BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
InvalidatePreds(*BBI.BB);
@@ -2101,10 +2053,6 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
// If the predicated instruction now redefines a register as the result of
// if-conversion, add an implicit kill.
UpdatePredRedefs(*MI, Redefs);
-
- // Some kill flags may not be correct anymore.
- if (!DontKill.empty())
- RemoveKills(*MI, DontKill);
}
if (!IgnoreBr) {
@@ -2133,7 +2081,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
/// Move all instructions from FromBB to the end of ToBB. This will leave
/// FromBB as an empty block, so remove all of its successor edges except for
/// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is
-/// being moved, add those successor edges to ToBBI.
+/// being moved, add those successor edges to ToBBI and remove the old edge
+/// from ToBBI to FromBBI.
void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
MachineBasicBlock &FromMBB = *FromBBI.BB;
assert(!FromMBB.hasAddressTaken() &&
@@ -2165,12 +2114,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// AddEdges is true and FromMBB is a successor of ToBBI.BB.
auto To2FromProb = BranchProbability::getZero();
if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+ // Remove the old edge but remember the edge probability so we can calculate
+ // the correct weights on the new edges being added further down.
To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
- // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the
- // edge probability being merged to other edges when this edge is removed
- // later.
- ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB),
- BranchProbability::getZero());
+ ToBBI.BB->removeSuccessor(&FromMBB);
}
for (MachineBasicBlock *Succ : FromSuccs) {
@@ -2229,9 +2176,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
}
}
- // Now FromBBI always falls through to the next block!
- if (NBB && !FromMBB.isSuccessor(NBB))
- FromMBB.addSuccessor(NBB);
+ // Move the now empty FromMBB out of the way to the end of the function so
+ // it doesn't interfere with fallthrough checks done by canFallThroughTo().
+ MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin();
+ if (Last != &FromMBB)
+ FromMBB.moveAfter(Last);
// Normalize the probabilities of ToBBI.BB's successors with all adjustment
// we've done above.
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index e308f49ec4e8..308b6d293d3d 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -1,4 +1,4 @@
-//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===//
+//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,38 +26,50 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Instruction.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
static cl::opt<int> PageSize("imp-null-check-page-size",
cl::desc("The page size of the target in bytes"),
- cl::init(4096));
+ cl::init(4096), cl::Hidden);
static cl::opt<unsigned> MaxInstsToConsider(
"imp-null-max-insts-to-consider",
cl::desc("The max number of instructions to consider hoisting loads over "
"(the algorithm is quadratic over this number)"),
- cl::init(8));
+ cl::Hidden, cl::init(8));
#define DEBUG_TYPE "implicit-null-checks"
@@ -152,7 +164,6 @@ class ImplicitNullChecks : public MachineFunctionPass {
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
AliasAnalysis *AA = nullptr;
- MachineModuleInfo *MMI = nullptr;
MachineFrameInfo *MFI = nullptr;
bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
@@ -166,6 +177,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
AR_MayAlias,
AR_WillAliasEverything
};
+
/// Returns AR_NoAlias if \p MI memory operation does not alias with
/// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
/// they may alias and any further memory operation may alias with \p PrevMI.
@@ -176,6 +188,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
SR_Unsuitable,
SR_Impossible
};
+
/// Return SR_Suitable if \p MI a memory operation that can be used to
/// implicitly null check the value in \p PointerReg, SR_Unsuitable if
/// \p MI cannot be used to null check and SR_Impossible if there is
@@ -200,6 +213,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override;
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -211,7 +225,7 @@ public:
}
};
-}
+} // end anonymous namespace
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
if (MI->isCall() || MI->hasUnmodeledSideEffects())
@@ -230,7 +244,7 @@ ImplicitNullChecks::DependenceResult
ImplicitNullChecks::computeDependence(const MachineInstr *MI,
ArrayRef<MachineInstr *> Block) {
assert(llvm::all_of(Block, canHandle) && "Check this first!");
- assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!");
+ assert(!is_contained(Block, MI) && "Block must be exclusive of MI!");
Optional<ArrayRef<MachineInstr *>::iterator> Dep;
@@ -280,7 +294,6 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getRegInfo().getTargetRegisterInfo();
- MMI = &MF.getMMI();
MFI = &MF.getFrameInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -356,7 +369,7 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
- Offset < PageSize))
+ -PageSize < Offset && Offset < PageSize))
return SR_Unsuitable;
// Finally, check whether the current memory access aliases with previous one.
@@ -390,8 +403,10 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
// We don't want to reason about speculating loads. Note -- at this point
// we should have already filtered out all of the other non-speculatable
// things, like calls and stores.
+ // We also do not want to hoist stores because it might change the memory
+ // while the FaultingMI may result in faulting.
assert(canHandle(DependenceMI) && "Should never have reached here!");
- if (DependenceMI->mayLoad())
+ if (DependenceMI->mayLoadOrStore())
return false;
for (auto &DependenceMO : DependenceMI->operands()) {
@@ -406,7 +421,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
// test %rcx, %rcx
// je _null_block
// _non_null_block:
- // %rdx<def> = INST
+ // %rdx = INST
// ...
//
// This restriction does not apply to the faulting load inst because in
@@ -441,7 +456,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
/// NullCheckList and return true, else return false.
bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
- typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+ using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
MDNode *BranchMD = nullptr;
if (auto *BB = MBB.getBasicBlock())
@@ -483,7 +498,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// Starting with a code fragment like:
//
- // test %RAX, %RAX
+ // test %rax, %rax
// jne LblNotNull
//
// LblNull:
@@ -493,13 +508,13 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// Inst0
// Inst1
// ...
- // Def = Load (%RAX + <offset>)
+ // Def = Load (%rax + <offset>)
// ...
//
//
// we want to end up with
//
- // Def = FaultingLoad (%RAX + <offset>), LblNull
+ // Def = FaultingLoad (%rax + <offset>), LblNull
// jmp LblNotNull ;; explicit or fallthrough
//
// LblNotNull:
@@ -513,11 +528,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
// To see why this is legal, consider the two possibilities:
//
- // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+ // 1. %rax is null: since we constrain <offset> to be less than PageSize, the
// load instruction dereferences the null page, causing a segmentation
// fault.
//
- // 2. %RAX is not null: in this case we know that the load cannot fault, as
+ // 2. %rax is not null: in this case we know that the load cannot fault, as
// otherwise the load would've faulted in the original program too and the
// original program would've been undefined.
//
@@ -555,7 +570,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
}
// If MI re-defines the PointerReg then we cannot move further.
- if (any_of(MI.operands(), [&](MachineOperand &MO) {
+ if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) {
return MO.isReg() && MO.getReg() && MO.isDef() &&
TRI->regsOverlap(MO.getReg(), PointerReg);
}))
@@ -674,9 +689,10 @@ void ImplicitNullChecks::rewriteNullChecks(
}
}
-
char ImplicitNullChecks::ID = 0;
+
char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
+
INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
"Implicit null checks", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index eda4f74c7874..1aaf7a0ceef8 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1,4 +1,4 @@
-//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,31 +12,52 @@
//
//===----------------------------------------------------------------------===//
+#include "LiveRangeCalc.h"
#include "Spiller.h"
#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -56,6 +77,7 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
namespace {
+
class HoistSpillHelper : private LiveRangeEdit::Delegate {
MachineFunction &MF;
LiveIntervals &LIS;
@@ -64,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
MachineDominatorTree &MDT;
MachineLoopInfo &Loops;
VirtRegMap &VRM;
- MachineFrameInfo &MFI;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
@@ -72,13 +93,17 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
InsertPointAnalysis IPA;
- // Map from StackSlot to its original register.
- DenseMap<int, unsigned> StackSlotToReg;
+ // Map from StackSlot to the LiveInterval of the original register.
+ // Note the LiveInterval of the original register may have been deleted
+ // after it is spilled. We keep a copy here to track the range where
+ // spills can be moved.
+ DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI;
+
// Map from pair of (StackSlot and Original VNI) to a set of spills which
// have the same stackslot and have equal values defined by Original VNI.
// These spills are mergeable and are hoist candiates.
- typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
- MergeableSpillsMap;
+ using MergeableSpillsMap =
+ MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>;
MergeableSpillsMap MergeableSpills;
/// This is the map from original register to a set containing all its
@@ -86,8 +111,8 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
/// sibling there and use it as the source of the new spill.
DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
- bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
- unsigned &LiveReg);
+ bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, unsigned &LiveReg);
void rmRedundantSpills(
SmallPtrSet<MachineInstr *, 16> &Spills,
@@ -101,7 +126,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
- void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
+ void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI,
SmallPtrSet<MachineInstr *, 16> &Spills,
SmallVectorImpl<MachineInstr *> &SpillsToRm,
DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
@@ -114,8 +139,7 @@ public:
AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
- TII(*mf.getSubtarget().getInstrInfo()),
+ MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
IPA(LIS, mf.getNumBlockIDs()) {}
@@ -135,7 +159,6 @@ class InlineSpiller : public Spiller {
MachineDominatorTree &MDT;
MachineLoopInfo &Loops;
VirtRegMap &VRM;
- MachineFrameInfo &MFI;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
@@ -163,7 +186,7 @@ class InlineSpiller : public Spiller {
// Object records spills information and does the hoisting.
HoistSpillHelper HSpiller;
- ~InlineSpiller() override {}
+ ~InlineSpiller() override = default;
public:
InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -172,8 +195,7 @@ public:
AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
- TII(*mf.getSubtarget().getInstrInfo()),
+ MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
HSpiller(pass, mf, vrm) {}
@@ -196,7 +218,7 @@ private:
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
- bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
MachineInstr *LoadMI = nullptr);
void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
@@ -204,19 +226,17 @@ private:
void spillAroundUses(unsigned Reg);
void spillAll();
};
-}
-namespace llvm {
+} // end anonymous namespace
-Spiller::~Spiller() { }
-void Spiller::anchor() { }
+Spiller::~Spiller() = default;
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm) {
- return new InlineSpiller(pass, mf, vrm);
-}
+void Spiller::anchor() {}
+Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
}
//===----------------------------------------------------------------------===//
@@ -340,7 +360,7 @@ bool InlineSpiller::isSibling(unsigned Reg) {
///
/// x = def
/// spill x
-/// y = use x<kill>
+/// y = use killed x
///
/// This hoist only helps when the copy kills its source.
///
@@ -457,7 +477,6 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
} while (!WorkList.empty());
}
-
//===----------------------------------------------------------------------===//
// Rematerialization
//===----------------------------------------------------------------------===//
@@ -496,7 +515,6 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
-
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
MIBundleOperands::VirtRegInfo RI =
@@ -654,7 +672,6 @@ void InlineSpiller::reMaterializeAll() {
DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
}
-
//===----------------------------------------------------------------------===//
// Spilling
//===----------------------------------------------------------------------===//
@@ -731,7 +748,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return True on success.
bool InlineSpiller::
-foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
MachineInstr *LoadMI) {
if (Ops.empty())
return false;
@@ -904,7 +921,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
/// spillAroundUses - insert spill code around each use of Reg.
void InlineSpiller::spillAroundUses(unsigned Reg) {
- DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
@@ -1060,7 +1077,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
DEBUG(dbgs() << "Inline spilling "
<< TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
<< ':' << edit.getParent()
- << "\nFrom original " << PrintReg(Original) << '\n');
+ << "\nFrom original " << printReg(Original) << '\n');
assert(edit.getParent().isSpillable() &&
"Attempting to spill already spilled value.");
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
@@ -1076,49 +1093,52 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
}
/// Optimizations after all the reg selections and spills are done.
-///
void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); }
/// When a spill is inserted, add the spill to MergeableSpills map.
-///
void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
unsigned Original) {
- StackSlotToReg[StackSlot] = Original;
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ // save a copy of LiveInterval in StackSlotToOrigLI because the original
+ // LiveInterval may be cleared after all its references are spilled.
+ if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
+ auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+ LI->assign(OrigLI, Allocator);
+ StackSlotToOrigLI[StackSlot] = std::move(LI);
+ }
SlotIndex Idx = LIS.getInstructionIndex(Spill);
- VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx.getRegSlot());
std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
MergeableSpills[MIdx].insert(&Spill);
}
/// When a spill is removed, remove the spill from MergeableSpills map.
/// Return true if the spill is removed successfully.
-///
bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
int StackSlot) {
- int Original = StackSlotToReg[StackSlot];
- if (!Original)
+ auto It = StackSlotToOrigLI.find(StackSlot);
+ if (It == StackSlotToOrigLI.end())
return false;
SlotIndex Idx = LIS.getInstructionIndex(Spill);
- VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ VNInfo *OrigVNI = It->second->getVNInfoAt(Idx.getRegSlot());
std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
return MergeableSpills[MIdx].erase(&Spill);
}
/// Check BB to see if it is a possible target BB to place a hoisted spill,
/// i.e., there should be a living sibling of OrigReg at the insert point.
-///
-bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
+bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
MachineBasicBlock &BB, unsigned &LiveReg) {
SlotIndex Idx;
- LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ unsigned OrigReg = OrigLI.reg;
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
if (MI != BB.end())
Idx = LIS.getInstructionIndex(*MI);
else
Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
- assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
- "Unexpected VNI");
+ assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI");
for (auto const SibReg : Siblings) {
LiveInterval &LI = LIS.getInterval(SibReg);
@@ -1133,7 +1153,6 @@ bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
/// Remove redundant spills in the same BB. Save those redundant spills in
/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
-///
void HoistSpillHelper::rmRedundantSpills(
SmallPtrSet<MachineInstr *, 16> &Spills,
SmallVectorImpl<MachineInstr *> &SpillsToRm,
@@ -1166,7 +1185,6 @@ void HoistSpillHelper::rmRedundantSpills(
/// time. \p SpillBBToSpill will be populated as part of the process and
/// maps a basic block to the first store occurring in the basic block.
/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
-///
void HoistSpillHelper::getVisitOrders(
MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
SmallVectorImpl<MachineDomTreeNode *> &Orders,
@@ -1254,9 +1272,9 @@ void HoistSpillHelper::getVisitOrders(
/// Try to hoist spills according to BB hotness. The spills to removed will
/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
/// \p SpillsToIns.
-///
void HoistSpillHelper::runHoistSpills(
- unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
+ LiveInterval &OrigLI, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
SmallVectorImpl<MachineInstr *> &SpillsToRm,
DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
// Visit order of dominator tree nodes.
@@ -1280,9 +1298,10 @@ void HoistSpillHelper::runHoistSpills(
// nodes set and the cost of all the spills inside those nodes.
// The nodes set are the locations where spills are to be inserted
// in the subtree of current node.
- typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
- NodesCostPair;
+ using NodesCostPair =
+ std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>;
DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+
// Iterate Orders set in reverse order, which will be a bottom-up order
// in the dominator tree. Once we visit a dom tree node, we know its
// children have already been visited and the spill locations in the
@@ -1331,7 +1350,7 @@ void HoistSpillHelper::runHoistSpills(
// Check whether Block is a possible candidate to insert spill.
unsigned LiveReg = 0;
- if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
+ if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg))
continue;
// If there are multiple spills that could be merged, bias a little
@@ -1391,18 +1410,12 @@ void HoistSpillHelper::runHoistSpills(
/// bottom-up order, and for each node, we will decide whether to hoist spills
/// inside its subtree to that node. In this way, we can get benefit locally
/// even if hoisting all the equal spills to one cold place is impossible.
-///
void HoistSpillHelper::hoistAllSpills() {
SmallVector<unsigned, 4> NewVRegs;
LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
- // Save the mapping between stackslot and its original reg.
- DenseMap<int, unsigned> SlotToOrigReg;
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- int Slot = VRM.getStackSlot(Reg);
- if (Slot != VirtRegMap::NO_STACK_SLOT)
- SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
unsigned Original = VRM.getPreSplitReg(Reg);
if (!MRI.def_empty(Reg))
Virt2SiblingsMap[Original].insert(Reg);
@@ -1411,8 +1424,7 @@ void HoistSpillHelper::hoistAllSpills() {
// Each entry in MergeableSpills contains a spill set with equal values.
for (auto &Ent : MergeableSpills) {
int Slot = Ent.first.first;
- unsigned OrigReg = SlotToOrigReg[Slot];
- LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ LiveInterval &OrigLI = *StackSlotToOrigLI[Slot];
VNInfo *OrigVNI = Ent.first.second;
SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
if (Ent.second.empty())
@@ -1431,7 +1443,7 @@ void HoistSpillHelper::hoistAllSpills() {
// SpillsToIns is the spill set to be newly inserted after hoisting.
DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
- runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+ runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
DEBUG({
dbgs() << "Finally inserted spills in BB: ";
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index f8cc24724580..72227cc7bba9 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
+//===- InterferenceCache.cpp - Caching per-block interference -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,9 +12,21 @@
//===----------------------------------------------------------------------===//
#include "InterferenceCache.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <tuple>
using namespace llvm;
@@ -149,7 +161,7 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BlockInterference *BI = &Blocks[MBBNum];
ArrayRef<SlotIndex> RegMaskSlots;
ArrayRef<const uint32_t*> RegMaskBits;
- for (;;) {
+ while (true) {
BI->Tag = Tag;
BI->First = BI->Last = SlotIndex();
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 18aa5c7c5ad6..160e2b16e294 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,47 +15,53 @@
#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
namespace llvm {
class LiveIntervals;
+class MachineFunction;
+class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY InterferenceCache {
- const TargetRegisterInfo *TRI;
- LiveIntervalUnion *LIUArray;
- MachineFunction *MF;
-
/// BlockInterference - information about the interference in a single basic
/// block.
struct BlockInterference {
- BlockInterference() : Tag(0) {}
- unsigned Tag;
+ unsigned Tag = 0;
SlotIndex First;
SlotIndex Last;
+
+ BlockInterference() {}
};
/// Entry - A cache entry containing interference information for all aliases
/// of PhysReg in all basic blocks.
class Entry {
/// PhysReg - The register currently represented.
- unsigned PhysReg;
+ unsigned PhysReg = 0;
/// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
/// change.
- unsigned Tag;
+ unsigned Tag = 0;
/// RefCount - The total number of Cursor instances referring to this Entry.
- unsigned RefCount;
+ unsigned RefCount = 0;
/// MF - The current function.
MachineFunction *MF;
/// Indexes - Mapping block numbers to SlotIndex ranges.
- SlotIndexes *Indexes;
+ SlotIndexes *Indexes = nullptr;
/// LIS - Used for accessing register mask interference maps.
- LiveIntervals *LIS;
+ LiveIntervals *LIS = nullptr;
/// PrevPos - The previous position the iterators were moved to.
SlotIndex PrevPos;
@@ -72,13 +78,12 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
unsigned VirtTag;
/// Fixed interference in RegUnit.
- LiveRange *Fixed;
+ LiveRange *Fixed = nullptr;
/// Iterator pointing into the fixed RegUnit interference.
LiveInterval::iterator FixedI;
- RegUnitInfo(LiveIntervalUnion &LIU)
- : VirtTag(LIU.getTag()), Fixed(nullptr) {
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()) {
VirtI.setMap(LIU.getMap());
}
};
@@ -94,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
void update(unsigned MBBNum);
public:
- Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {}
+ Entry() = default;
void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
@@ -134,13 +139,17 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
// robin manner.
enum { CacheEntries = 32 };
+ const TargetRegisterInfo *TRI = nullptr;
+ LiveIntervalUnion *LIUArray = nullptr;
+ MachineFunction *MF = nullptr;
+
// Point to an entry for each physreg. The entry pointed to may not be up to
// date, and it may have been reused for a different physreg.
- unsigned char* PhysRegEntries;
- size_t PhysRegEntriesCount;
+ unsigned char* PhysRegEntries = nullptr;
+ size_t PhysRegEntriesCount = 0;
// Next round-robin entry to be picked.
- unsigned RoundRobin;
+ unsigned RoundRobin = 0;
// The actual cache entries.
Entry Entries[CacheEntries];
@@ -149,9 +158,9 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
Entry *get(unsigned PhysReg);
public:
- InterferenceCache()
- : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr),
- PhysRegEntriesCount(0), RoundRobin(0) {}
+ friend class Cursor;
+
+ InterferenceCache() = default;
~InterferenceCache() {
free(PhysRegEntries);
@@ -160,8 +169,9 @@ public:
void reinitPhysRegEntries();
/// init - Prepare cache for a new function.
- void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
- const TargetRegisterInfo *);
+ void init(MachineFunction *mf, LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes, LiveIntervals *lis,
+ const TargetRegisterInfo *tri);
/// getMaxCursors - Return the maximum number of concurrent cursors that can
/// be supported.
@@ -169,8 +179,8 @@ public:
/// Cursor - The primary query interface for the block interference cache.
class Cursor {
- Entry *CacheEntry;
- const BlockInterference *Current;
+ Entry *CacheEntry = nullptr;
+ const BlockInterference *Current = nullptr;
static const BlockInterference NoInterference;
void setEntry(Entry *E) {
@@ -186,10 +196,9 @@ public:
public:
/// Cursor - Create a dangling cursor.
- Cursor() : CacheEntry(nullptr), Current(nullptr) {}
- ~Cursor() { setEntry(nullptr); }
+ Cursor() = default;
- Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) {
+ Cursor(const Cursor &O) {
setEntry(O.CacheEntry);
}
@@ -198,6 +207,8 @@ public:
return *this;
}
+ ~Cursor() { setEntry(nullptr); }
+
/// setPhysReg - Point this cursor to PhysReg's interference.
void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
// Release reference before getting a new one. That guarantees we can
@@ -229,10 +240,8 @@ public:
return Current->Last;
}
};
-
- friend class Cursor;
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index ee4929c91482..9c906d309639 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,4 +1,4 @@
-//===--------------------- InterleavedAccessPass.cpp ----------------------===//
+//===- InterleavedAccessPass.cpp ------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -42,17 +42,32 @@
//
// Similarly, a set of interleaved stores can be transformed into an optimized
// sequence of shuffles followed by a set of target specific stores for X86.
+//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <utility>
using namespace llvm;
@@ -66,10 +81,10 @@ static cl::opt<bool> LowerInterleavedAccesses(
namespace {
class InterleavedAccess : public FunctionPass {
-
public:
static char ID;
- InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) {
+
+ InterleavedAccess() : FunctionPass(ID) {
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}
@@ -83,8 +98,8 @@ public:
}
private:
- DominatorTree *DT;
- const TargetLowering *TLI;
+ DominatorTree *DT = nullptr;
+ const TargetLowering *TLI = nullptr;
/// The maximum supported interleave factor.
unsigned MaxFactor;
@@ -104,9 +119,11 @@ private:
bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles);
};
+
} // end anonymous namespace.
char InterleavedAccess::ID = 0;
+
INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
@@ -331,7 +348,6 @@ bool InterleavedAccess::lowerInterleavedLoad(
bool InterleavedAccess::tryReplaceExtracts(
ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles) {
-
// If there aren't any extractelement instructions to modify, there's nothing
// to do.
if (Extracts.empty())
@@ -342,7 +358,6 @@ bool InterleavedAccess::tryReplaceExtracts(
DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
for (auto *Extract : Extracts) {
-
// The vector index that is extracted.
auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
auto Index = IndexOperand->getSExtValue();
@@ -351,7 +366,6 @@ bool InterleavedAccess::tryReplaceExtracts(
// extractelement instruction (which uses an interleaved load) to use one
// of the shufflevector instructions instead of the load.
for (auto *Shuffle : Shuffles) {
-
// If the shufflevector instruction doesn't dominate the extract, we
// can't create a use of it.
if (!DT->dominates(Shuffle, Extract))
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index c6cc909e25d3..12777d5ed110 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -57,10 +57,10 @@ static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
}
}
-/// ReplaceCallWith - This function is used when we want to lower an intrinsic
-/// call to a call of an external function. This handles hard cases such as
-/// when there was already a prototype for the external function, and if that
-/// prototype doesn't match the arguments we expect to pass in.
+/// This function is used when we want to lower an intrinsic call to a call of
+/// an external function. This handles hard cases such as when there was already
+/// a prototype for the external function, but that prototype doesn't match the
+/// arguments we expect to pass in.
template <class ArgIt>
static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
ArgIt ArgBegin, ArgIt ArgEnd,
@@ -161,12 +161,11 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
}
}
-/// LowerBSWAP - Emit the code to lower bswap of V before the specified
-/// instruction IP.
+/// Emit the code to lower bswap of V before the specified instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
- assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+ assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
- unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned BitSize = V->getType()->getScalarSizeInBits();
IRBuilder<> Builder(IP);
@@ -190,10 +189,10 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
"bswap.1");
Tmp3 = Builder.CreateAnd(Tmp3,
- ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ ConstantInt::get(V->getType(), 0xFF0000),
"bswap.and3");
Tmp2 = Builder.CreateAnd(Tmp2,
- ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ ConstantInt::get(V->getType(), 0xFF00),
"bswap.and2");
Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
@@ -221,27 +220,27 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
ConstantInt::get(V->getType(), 56),
"bswap.1");
Tmp7 = Builder.CreateAnd(Tmp7,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF000000000000ULL),
"bswap.and7");
Tmp6 = Builder.CreateAnd(Tmp6,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF0000000000ULL),
"bswap.and6");
Tmp5 = Builder.CreateAnd(Tmp5,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF00000000ULL),
"bswap.and5");
Tmp4 = Builder.CreateAnd(Tmp4,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF000000ULL),
"bswap.and4");
Tmp3 = Builder.CreateAnd(Tmp3,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF0000ULL),
"bswap.and3");
Tmp2 = Builder.CreateAnd(Tmp2,
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(V->getType(),
0xFF00ULL),
"bswap.and2");
Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
@@ -257,8 +256,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
return V;
}
-/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
-/// instruction IP.
+/// Emit the code to lower ctpop of V before the specified instruction IP.
static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
@@ -297,8 +295,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
return Count;
}
-/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
-/// instruction IP.
+/// Emit the code to lower ctlz of V before the specified instruction IP.
static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
IRBuilder<> Builder(IP);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index f2defb4fd623..92edfb059ad6 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -16,11 +16,12 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -29,10 +30,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
using namespace llvm;
void LLVMTargetMachine::initAsmInfo() {
@@ -77,7 +76,6 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
- T.adjustCodeGenOpts(TT, RM, CM);
this->RM = RM;
this->CMModel = CM;
this->OptLevel = OL;
@@ -92,24 +90,25 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *
addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
- bool DisableVerify, AnalysisID StartBefore,
- AnalysisID StartAfter, AnalysisID StopBefore,
- AnalysisID StopAfter) {
+ bool DisableVerify, bool &WillCompleteCodeGenPipeline,
+ raw_pwrite_stream &Out, MachineModuleInfo *MMI) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
- PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
- StopAfter);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
+ WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline();
PM.add(PassConfig);
- MachineModuleInfo *MMI = new MachineModuleInfo(TM);
+ if (!MMI)
+ MMI = new MachineModuleInfo(TM);
PM.add(MMI);
if (PassConfig->addISelPasses())
return nullptr;
PassConfig->addMachinePasses();
PassConfig->setInitialized();
+ if (!WillCompleteCodeGenPipeline)
+ PM.add(createPrintMIRPass(Out));
return &MMI->getContext();
}
@@ -163,7 +162,8 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
- T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out,
+ std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
break;
@@ -185,23 +185,20 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
return false;
}
-bool LLVMTargetMachine::addPassesToEmitFile(
- PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
- bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
- AnalysisID StopBefore, AnalysisID StopAfter) {
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ raw_pwrite_stream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ MachineModuleInfo *MMI) {
// Add common CodeGen passes.
- MCContext *Context =
- addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
- StopBefore, StopAfter);
+ bool WillCompleteCodeGenPipeline = true;
+ MCContext *Context = addPassesToGenerateCode(
+ this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI);
if (!Context)
return true;
- if (StopBefore || StopAfter) {
- PM.add(createPrintMIRPass(Out));
- } else {
- if (addAsmPrinter(PM, Out, FileType, *Context))
- return true;
- }
+ if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context))
+ return true;
PM.add(createFreeMachineFunctionPass());
return false;
@@ -216,10 +213,13 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
- nullptr, nullptr);
+ bool WillCompleteCodeGenPipeline = true;
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify,
+ WillCompleteCodeGenPipeline, Out,
+ /*MachineModuleInfo*/ nullptr);
if (!Ctx)
return true;
+ assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered");
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
@@ -238,7 +238,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
const Triple &T = getTargetTriple();
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
+ std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 86ef898932a7..8ffd51a550fc 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -134,6 +134,7 @@ SUnit *LatencyPriorityQueue::pop() {
void LatencyPriorityQueue::remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
std::vector<SUnit *>::iterator I = find(Queue, SU);
+ assert(I != Queue.end() && "Queue doesn't contain the SU being removed!");
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 995c58a63564..8c54751ee833 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -47,11 +47,11 @@ void LexicalScopes::reset() {
/// initialize - Scan machine function and constuct lexical scope nest.
void LexicalScopes::initialize(const MachineFunction &Fn) {
+ reset();
// Don't attempt any lexical scope creation for a NoDebug compile unit.
- if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() ==
+ if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
DICompileUnit::NoDebug)
return;
- reset();
MF = &Fn;
SmallVector<InsnRange, 4> MIRanges;
DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
@@ -173,7 +173,7 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
false)).first;
if (!Parent) {
- assert(cast<DISubprogram>(Scope)->describes(MF->getFunction()));
+ assert(cast<DISubprogram>(Scope)->describes(&MF->getFunction()));
assert(!CurrentFnLexicalScope);
CurrentFnLexicalScope = &I->second;
}
@@ -277,7 +277,9 @@ void LexicalScopes::assignInstructionRanges(
/// DebugLoc.
void LexicalScopes::getMachineBasicBlocks(
const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+ assert(MF && "Method called on a uninitialized LexicalScopes object!");
MBBs.clear();
+
LexicalScope *Scope = getOrCreateLexicalScope(DL);
if (!Scope)
return;
@@ -296,6 +298,7 @@ void LexicalScopes::getMachineBasicBlocks(
/// dominates - Return true if DebugLoc's lexical scope dominates at least one
/// machine instruction's lexical scope in a given machine basic block.
bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
+ assert(MF && "Unexpected uninitialized LexicalScopes object!");
LexicalScope *Scope = getOrCreateLexicalScope(DL);
if (!Scope)
return false;
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index b5e705f6455d..19ec281079cb 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -1,4 +1,4 @@
-//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,28 +18,45 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <list>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
#include <queue>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -47,8 +64,6 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-namespace {
-
// \brief If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
@@ -59,8 +74,9 @@ static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
}
-class LiveDebugValues : public MachineFunctionPass {
+namespace {
+class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
@@ -87,15 +103,15 @@ private:
};
/// Based on std::pair so it can be used as an index into a DenseMap.
- typedef std::pair<const DILocalVariable *, const DILocation *>
- DebugVariableBase;
+ using DebugVariableBase =
+ std::pair<const DILocalVariable *, const DILocation *>;
/// A potentially inlined instance of a variable.
struct DebugVariable : public DebugVariableBase {
DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
: DebugVariableBase(Var, InlinedAt) {}
- const DILocalVariable *getVar() const { return this->first; };
- const DILocation *getInlinedAt() const { return this->second; };
+ const DILocalVariable *getVar() const { return this->first; }
+ const DILocation *getInlinedAt() const { return this->second; }
bool operator<(const DebugVariable &DV) const {
if (getVar() == DV.getVar())
@@ -109,38 +125,25 @@ private:
const DebugVariable Var;
const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
mutable UserValueScopes UVS;
- enum { InvalidKind = 0, RegisterKind } Kind;
+ enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind;
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
union {
- struct {
- uint32_t RegNo;
- uint32_t Offset;
- } RegisterLoc;
+ uint64_t RegNo;
uint64_t Hash;
} Loc;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
- UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) {
+ UVS(MI.getDebugLoc(), LS) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
Kind = RegisterKind;
- Loc.RegisterLoc.RegNo = RegNo;
- int64_t Offset =
- MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
- // We don't support offsets larger than 4GiB here. They are
- // slated to be replaced with DIExpressions anyway.
- // With indirect debug values used for spill locations, Offset
- // can be negative.
- if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32))
- Kind = InvalidKind;
- else
- Loc.RegisterLoc.Offset = Offset;
+ Loc.RegNo = RegNo;
}
}
@@ -148,7 +151,7 @@ private:
/// otherwise return 0.
unsigned isDescribedByReg() const {
if (Kind == RegisterKind)
- return Loc.RegisterLoc.RegNo;
+ return Loc.RegNo;
return 0;
}
@@ -172,14 +175,14 @@ private:
}
};
- typedef UniqueVector<VarLoc> VarLocMap;
- typedef SparseBitVector<> VarLocSet;
- typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+ using VarLocMap = UniqueVector<VarLoc>;
+ using VarLocSet = SparseBitVector<>;
+ using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
struct SpillDebugPair {
MachineInstr *SpillInst;
MachineInstr *DebugInst;
};
- typedef SmallVector<SpillDebugPair, 4> SpillMap;
+ using SpillMap = SmallVector<SpillDebugPair, 4>;
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
@@ -275,14 +278,16 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // namespace
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
char LiveDebugValues::ID = 0;
+
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+
INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
false, false)
@@ -368,7 +373,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
OpenRangesSet &OpenRanges,
const VarLocMap &VarLocIDs) {
- MachineFunction *MF = MI.getParent()->getParent();
+ MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
SparseBitVector<> KillSet;
@@ -444,14 +449,14 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,
VarLocMap &VarLocIDs,
SpillMap &Spills) {
unsigned Reg;
- MachineFunction *MF = MI.getParent()->getParent();
+ MachineFunction *MF = MI.getMF();
if (!isSpillInstruction(MI, MF, Reg))
return;
// Check if the register is the location of a debug value.
for (unsigned ID : OpenRanges.getVarLocs()) {
if (VarLocIDs[ID].isDescribedByReg() == Reg) {
- DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '('
+ DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
// Create a DBG_VALUE instruction to describe the Var in its spilled
@@ -460,10 +465,11 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,
unsigned SpillBase;
int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
const MachineInstr *DMI = &VarLocIDs[ID].MI;
+ auto *SpillExpr = DIExpression::prepend(
+ DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset);
MachineInstr *SpDMI =
- BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0,
- DMI->getDebugVariable(), DMI->getDebugExpression());
- SpDMI->getOperand(1).setImm(SpillOffset);
+ BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
+ DMI->getDebugVariable(), SpillExpr);
DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
SpDMI->print(dbgs(), false, TII));
@@ -582,7 +588,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
const MachineInstr *DMI = &DiffIt.MI;
MachineInstr *MI =
BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+ DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
DMI->getDebugVariable(), DMI->getDebugExpression());
if (DMI->isIndirectDebugValue())
MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
@@ -597,7 +603,6 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
-
DEBUG(dbgs() << "\nDebug Range Extension\n");
bool Changed = false;
@@ -698,10 +703,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getFunction()->getSubprogram())
+ if (!MF.getFunction().getSubprogram())
// LiveDebugValues will already have removed all DBG_VALUEs.
return false;
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 0c76478af551..34572f24c181 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -20,26 +20,44 @@
//===----------------------------------------------------------------------===//
#include "LiveDebugVariables.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
#include <memory>
#include <utility>
@@ -52,6 +70,7 @@ EnableLDV("live-debug-variables", cl::init(true),
cl::desc("Enable the live debug variables pass"), cl::Hidden);
STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+
char LiveDebugVariables::ID = 0;
INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
@@ -68,12 +87,56 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) {
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
}
+enum : unsigned { UndefLocNo = ~0U };
+
+/// Describes a location by number along with some flags about the original
+/// usage of the location.
+class DbgValueLocation {
+public:
+ DbgValueLocation(unsigned LocNo, bool WasIndirect)
+ : LocNo(LocNo), WasIndirect(WasIndirect) {
+ static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
+ assert(locNo() == LocNo && "location truncation");
+ }
+
+ DbgValueLocation() : LocNo(0), WasIndirect(0) {}
+
+ unsigned locNo() const {
+ // Fix up the undef location number, which gets truncated.
+ return LocNo == INT_MAX ? UndefLocNo : LocNo;
+ }
+ bool wasIndirect() const { return WasIndirect; }
+ bool isUndef() const { return locNo() == UndefLocNo; }
+
+ DbgValueLocation changeLocNo(unsigned NewLocNo) const {
+ return DbgValueLocation(NewLocNo, WasIndirect);
+ }
+
+ friend inline bool operator==(const DbgValueLocation &LHS,
+ const DbgValueLocation &RHS) {
+ return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
+ }
+
+ friend inline bool operator!=(const DbgValueLocation &LHS,
+ const DbgValueLocation &RHS) {
+ return !(LHS == RHS);
+ }
+
+private:
+ unsigned LocNo : 31;
+ unsigned WasIndirect : 1;
+};
+
/// LocMap - Map of where a user value is live, and its location.
-typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
+
+namespace {
+
+class LDVImpl;
/// UserValue - A user value is a part of a debug info user variable.
///
@@ -84,17 +147,13 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
/// user values are related if they refer to the same variable, or if they are
/// held by the same virtual register. The equivalence class is the transitive
/// closure of that relation.
-namespace {
-class LDVImpl;
class UserValue {
- const MDNode *Variable; ///< The debug info variable we are part of.
- const MDNode *Expression; ///< Any complex address expression.
- unsigned offset; ///< Byte offset into variable.
- bool IsIndirect; ///< true if this is a register-indirect+offset value.
+ const DILocalVariable *Variable; ///< The debug info variable we are part of.
+ const DIExpression *Expression; ///< Any complex address expression.
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
UserValue *leader; ///< Equivalence class leader.
- UserValue *next; ///< Next value in equivalence class, or null.
+ UserValue *next = nullptr; ///< Next value in equivalence class, or null.
/// Numbered locations referenced by locmap.
SmallVector<MachineOperand, 4> locations;
@@ -102,14 +161,16 @@ class UserValue {
/// Map of slot indices where this value is live.
LocMap locInts;
- /// coalesceLocation - After LocNo was changed, check if it has become
- /// identical to another location, and coalesce them. This may cause LocNo or
- /// a later location to be erased, but no earlier location will be erased.
- void coalesceLocation(unsigned LocNo);
+ /// Set of interval start indexes that have been trimmed to the
+ /// lexical scope.
+ SmallSet<SlotIndex, 2> trimmedDefs;
/// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
- void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
- LiveIntervals &LIS, const TargetInstrInfo &TII);
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+ SlotIndex StopIdx,
+ DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI);
/// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
@@ -118,10 +179,10 @@ class UserValue {
public:
/// UserValue - Create a new UserValue.
- UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
- DebugLoc L, LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), offset(o), IsIndirect(i),
- dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {}
+ UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+ locInts(alloc) {}
/// getLeader - Get the leader of this value's equivalence class.
UserValue *getLeader() {
@@ -135,10 +196,11 @@ public:
UserValue *getNext() const { return next; }
/// match - Does this UserValue match the parameters?
- bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA,
- unsigned Offset, bool indirect) const {
- return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
- Offset == offset && indirect == IsIndirect;
+ bool match(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA) const {
+ // FIXME: The fragment should be part of the equivalence class, but not
+ // other things in the expression like stack values.
+ return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
}
/// merge - Merge equivalence classes.
@@ -165,7 +227,7 @@ public:
unsigned getLocationNo(const MachineOperand &LocMO) {
if (LocMO.isReg()) {
if (LocMO.getReg() == 0)
- return ~0u;
+ return UndefLocNo;
// For register locations we dont care about use/def and other flags.
for (unsigned i = 0, e = locations.size(); i != e; ++i)
if (locations[i].isReg() &&
@@ -189,14 +251,15 @@ public:
void mapVirtRegs(LDVImpl *LDV);
/// addDef - Add a definition point to this value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
+ DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
// Add a singular (Idx,Idx) -> Loc mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
- I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ I.insert(Idx, Idx.getNextSlot(), Loc);
else
// A later DBG_VALUE at the same SlotIndex overrides the old location.
- I.setValue(getLocationNo(LocMO));
+ I.setValue(Loc);
}
/// extendDef - Extend the current definition as far as possible down.
@@ -204,12 +267,12 @@ public:
/// range of VNI.
/// End points where VNI is no longer live are added to Kills.
/// @param Idx Starting point for the definition.
- /// @param LocNo Location number to propagate.
+ /// @param Loc Location number to propagate.
/// @param LR Restrict liveness to where LR has the value VNI. May be null.
/// @param VNI When LR is not null, this is the value to restrict to.
/// @param Kills Append end points of VNI's live range to Kills.
/// @param LIS Live intervals analysis.
- void extendDef(SlotIndex Idx, unsigned LocNo,
+ void extendDef(SlotIndex Idx, DbgValueLocation Loc,
LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS);
@@ -219,18 +282,19 @@ public:
/// points, and add defs if possible.
/// @param LI Scan for copies of the value in LI->reg.
/// @param LocNo Location number of LI->reg.
+ /// @param WasIndirect Indicates if the original use of LI->reg was indirect
/// @param Kills Points where the range of LocNo could be extended.
/// @param NewDefs Append (Idx, LocNo) of inserted defs here.
- void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
- const SmallVectorImpl<SlotIndex> &Kills,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS);
+ void addDefsFromCopies(
+ LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS);
/// computeIntervals - Compute the live intervals of all locations after
/// collecting all their def points.
void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- LiveIntervals &LIS);
+ LiveIntervals &LIS, LexicalScopes &LS);
/// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
@@ -238,47 +302,50 @@ public:
LiveIntervals &LIS);
/// rewriteLocations - Rewrite virtual register locations according to the
- /// provided virtual register map.
- void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+ /// provided virtual register map. Record which locations were spilled.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
+ BitVector &SpilledLocations);
/// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
- void emitDebugValues(VirtRegMap *VRM,
- LiveIntervals &LIS, const TargetInstrInfo &TRI);
+ void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const BitVector &SpilledLocations);
/// getDebugLoc - Return DebugLoc of this UserValue.
DebugLoc getDebugLoc() { return dl;}
+
void print(raw_ostream &, const TargetRegisterInfo *);
};
-} // namespace
/// LDVImpl - Implementation of the LiveDebugVariables pass.
-namespace {
class LDVImpl {
LiveDebugVariables &pass;
LocMap::Allocator allocator;
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
LiveIntervals *LIS;
const TargetRegisterInfo *TRI;
/// Whether emitDebugValues is called.
- bool EmitDone;
+ bool EmitDone = false;
+
/// Whether the machine function is modified during the pass.
- bool ModifiedMF;
+ bool ModifiedMF = false;
/// userValues - All allocated UserValue instances.
SmallVector<std::unique_ptr<UserValue>, 8> userValues;
/// Map virtual register to eq class leader.
- typedef DenseMap<unsigned, UserValue*> VRMap;
+ using VRMap = DenseMap<unsigned, UserValue *>;
VRMap virtRegToEqClass;
/// Map user variable to eq class leader.
- typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
UVMap userVarMap;
/// getUserValue - Find or create a UserValue.
- UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
- unsigned Offset, bool IsIndirect, const DebugLoc &DL);
+ UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
+ const DebugLoc &DL);
/// lookupVirtReg - Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
@@ -300,8 +367,8 @@ class LDVImpl {
void computeIntervals();
public:
- LDVImpl(LiveDebugVariables *ps)
- : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+
bool runOnMachineFunction(MachineFunction &mf);
/// clear - Release all memory.
@@ -328,8 +395,10 @@ public:
void print(raw_ostream&);
};
-} // namespace
+} // end anonymous namespace
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
const LLVMContext &Ctx) {
if (!DL)
@@ -372,14 +441,15 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
printExtendedName(OS, DV, dl);
OS << "\"\t";
- if (offset)
- OS << '+' << offset;
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
OS << " [" << I.start() << ';' << I.stop() << "):";
- if (I.value() == ~0u)
+ if (I.value().isUndef())
OS << "undef";
- else
- OS << I.value();
+ else {
+ OS << I.value().locNo();
+ if (I.value().wasIndirect())
+ OS << " ind";
+ }
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
OS << " Loc" << i << '=';
@@ -393,34 +463,7 @@ void LDVImpl::print(raw_ostream &OS) {
for (unsigned i = 0, e = userValues.size(); i != e; ++i)
userValues[i]->print(OS, TRI);
}
-
-void UserValue::coalesceLocation(unsigned LocNo) {
- unsigned KeepLoc = 0;
- for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
- if (KeepLoc == LocNo)
- continue;
- if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
- break;
- }
- // No matches.
- if (KeepLoc == locations.size())
- return;
-
- // Keep the smaller location, erase the larger one.
- unsigned EraseLoc = LocNo;
- if (KeepLoc > EraseLoc)
- std::swap(KeepLoc, EraseLoc);
- locations.erase(locations.begin() + EraseLoc);
-
- // Rewrite values.
- for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
- unsigned v = I.value();
- if (v == EraseLoc)
- I.setValue(KeepLoc); // Coalesce when possible.
- else if (v > EraseLoc)
- I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
- }
-}
+#endif
void UserValue::mapVirtRegs(LDVImpl *LDV) {
for (unsigned i = 0, e = locations.size(); i != e; ++i)
@@ -429,20 +472,19 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
LDV->mapVirtReg(locations[i].getReg(), this);
}
-UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
- unsigned Offset, bool IsIndirect,
- const DebugLoc &DL) {
+UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
+ const DIExpression *Expr, const DebugLoc &DL) {
UserValue *&Leader = userVarMap[Var];
if (Leader) {
UserValue *UV = Leader->getLeader();
Leader = UV;
for (; UV; UV = UV->getNext())
- if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
+ if (UV->match(Var, Expr, DL->getInlinedAt()))
return UV;
}
userValues.push_back(
- make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator));
+ llvm::make_unique<UserValue>(Var, Expr, DL, allocator));
UserValue *UV = userValues.back().get();
Leader = UserValue::merge(Leader, UV);
return UV;
@@ -469,14 +511,15 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return false;
}
- // Get or create the UserValue for (variable,offset).
- bool IsIndirect = MI.isIndirectDebugValue();
- unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0;
- const MDNode *Var = MI.getDebugVariable();
- const MDNode *Expr = MI.getDebugExpression();
- //here.
- UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc());
- UV->addDef(Idx, MI.getOperand(0));
+ // Get or create the UserValue for (variable,offset) here.
+ bool IsIndirect = MI.getOperand(1).isImm();
+ if (IsIndirect)
+ assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ UserValue *UV =
+ getUserValue(Var, Expr, MI.getDebugLoc());
+ UV->addDef(Idx, MI.getOperand(0), IsIndirect);
return true;
}
@@ -511,7 +554,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
/// data-flow analysis to propagate them beyond basic block boundaries.
-void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS) {
SlotIndex Start = Idx;
@@ -538,7 +581,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
if (I.valid() && I.start() <= Start) {
// Stop when meeting a different location or an already extended interval.
Start = Start.getNextSlot();
- if (I.value() != LocNo || I.stop() != Start)
+ if (I.value() != Loc || I.stop() != Start)
return;
// This is a one-slot placeholder. Just skip it.
++I;
@@ -554,14 +597,14 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
Kills->push_back(Stop);
if (Start < Stop)
- I.insert(Start, Stop, LocNo);
+ I.insert(Start, Stop, Loc);
}
-void
-UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
- const SmallVectorImpl<SlotIndex> &Kills,
- SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
- MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+void UserValue::addDefsFromCopies(
+ LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
if (Kills.empty())
return;
// Don't track copies from physregs, there are too many uses.
@@ -588,7 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
// it, or we are looking at a wrong value of LI.
SlotIndex Idx = LIS.getInstructionIndex(*MI);
LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
- if (!I.valid() || I.value() != LocNo)
+ if (!I.valid() || I.value().locNo() != LocNo)
continue;
if (!LIS.hasInterval(DstReg))
@@ -621,69 +664,141 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- I.insert(Idx, Idx.getNextSlot(), LocNo);
- NewDefs.push_back(std::make_pair(Idx, LocNo));
+ DbgValueLocation NewLoc(LocNo, WasIndirect);
+ I.insert(Idx, Idx.getNextSlot(), NewLoc);
+ NewDefs.push_back(std::make_pair(Idx, NewLoc));
break;
}
}
}
-void
-UserValue::computeIntervals(MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- LiveIntervals &LIS) {
- SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+void UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, LexicalScopes &LS) {
+ SmallVector<std::pair<SlotIndex, DbgValueLocation>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
- if (I.value() != ~0u)
+ if (!I.value().isUndef())
Defs.push_back(std::make_pair(I.start(), I.value()));
// Extend all defs, and possibly add new ones along the way.
for (unsigned i = 0; i != Defs.size(); ++i) {
SlotIndex Idx = Defs[i].first;
- unsigned LocNo = Defs[i].second;
- const MachineOperand &Loc = locations[LocNo];
+ DbgValueLocation Loc = Defs[i].second;
+ const MachineOperand &LocMO = locations[Loc.locNo()];
- if (!Loc.isReg()) {
- extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS);
+ if (!LocMO.isReg()) {
+ extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS);
continue;
}
// Register locations are constrained to where the register value is live.
- if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) {
LiveInterval *LI = nullptr;
const VNInfo *VNI = nullptr;
- if (LIS.hasInterval(Loc.getReg())) {
- LI = &LIS.getInterval(Loc.getReg());
+ if (LIS.hasInterval(LocMO.getReg())) {
+ LI = &LIS.getInterval(LocMO.getReg());
VNI = LI->getVNInfoAt(Idx);
}
SmallVector<SlotIndex, 16> Kills;
- extendDef(Idx, LocNo, LI, VNI, &Kills, LIS);
+ extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
if (LI)
- addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
+ LIS);
continue;
}
- // For physregs, use the live range of the first regunit as a guide.
- unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
- LiveRange *LR = &LIS.getRegUnit(Unit);
- const VNInfo *VNI = LR->getVNInfoAt(Idx);
- // Don't track copies from physregs, it is too expensive.
- extendDef(Idx, LocNo, LR, VNI, nullptr, LIS);
+ // For physregs, we only mark the start slot idx. DwarfDebug will see it
+ // as if the DBG_VALUE is valid up until the end of the basic block, or
+ // the next def of the physical register. So we do not need to extend the
+ // range. It might actually happen that the DBG_VALUE is the last use of
+ // the physical register (e.g. if this is an unused input argument to a
+ // function).
}
- // Finally, erase all the undefs.
+ // Erase all the undefs.
for (LocMap::iterator I = locInts.begin(); I.valid();)
- if (I.value() == ~0u)
+ if (I.value().isUndef())
I.erase();
else
++I;
+
+ // The computed intervals may extend beyond the range of the debug
+ // location's lexical scope. In this case, splitting of an interval
+ // can result in an interval outside of the scope being created,
+ // causing extra unnecessary DBG_VALUEs to be emitted. To prevent
+ // this, trim the intervals to the lexical scope.
+
+ LexicalScope *Scope = LS.findLexicalScope(dl);
+ if (!Scope)
+ return;
+
+ SlotIndex PrevEnd;
+ LocMap::iterator I = locInts.begin();
+
+ // Iterate over the lexical scope ranges. Each time round the loop
+ // we check the intervals for overlap with the end of the previous
+ // range and the start of the next. The first range is handled as
+ // a special case where there is no PrevEnd.
+ for (const InsnRange &Range : Scope->getRanges()) {
+ SlotIndex RStart = LIS.getInstructionIndex(*Range.first);
+ SlotIndex REnd = LIS.getInstructionIndex(*Range.second);
+
+ // At the start of each iteration I has been advanced so that
+ // I.stop() >= PrevEnd. Check for overlap.
+ if (PrevEnd && I.start() < PrevEnd) {
+ SlotIndex IStop = I.stop();
+ DbgValueLocation Loc = I.value();
+
+ // Stop overlaps previous end - trim the end of the interval to
+ // the scope range.
+ I.setStopUnchecked(PrevEnd);
+ ++I;
+
+ // If the interval also overlaps the start of the "next" (i.e.
+ // current) range create a new interval for the remainder (which
+ // may be further trimmed).
+ if (RStart < IStop)
+ I.insert(RStart, IStop, Loc);
+ }
+
+ // Advance I so that I.stop() >= RStart, and check for overlap.
+ I.advanceTo(RStart);
+ if (!I.valid())
+ return;
+
+ if (I.start() < RStart) {
+ // Interval start overlaps range - trim to the scope range.
+ I.setStartUnchecked(RStart);
+ // Remember that this interval was trimmed.
+ trimmedDefs.insert(RStart);
+ }
+
+ // The end of a lexical scope range is the last instruction in the
+ // range. To convert to an interval we need the index of the
+ // instruction after it.
+ REnd = REnd.getNextIndex();
+
+ // Advance I to first interval outside current range.
+ I.advanceTo(REnd);
+ if (!I.valid())
+ return;
+
+ PrevEnd = REnd;
+ }
+
+ // Check for overlap with end of final range.
+ if (PrevEnd && I.start() < PrevEnd)
+ I.setStopUnchecked(PrevEnd);
}
void LDVImpl::computeIntervals() {
+ LexicalScopes LS;
+ LS.initialize(*MF);
+
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
userValues[i]->mapVirtRegs(this);
}
}
@@ -718,7 +833,7 @@ static void removeDebugValues(MachineFunction &mf) {
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
- if (!mf.getFunction()->getSubprogram()) {
+ if (!mf.getFunction().getSubprogram()) {
removeDebugValues(mf);
return false;
}
@@ -757,7 +872,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
continue;
// Don't allocate the new LocNo until it is needed.
- unsigned NewLocNo = ~0u;
+ unsigned NewLocNo = UndefLocNo;
// Iterate over the overlaps between locInts and LI.
LocMapI.find(LI->beginIndex());
@@ -772,9 +887,9 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
break;
// Now LII->end > LocMapI.start(). Do we have an overlap?
- if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) {
// Overlapping correct location. Allocate NewLocNo now.
- if (NewLocNo == ~0u) {
+ if (NewLocNo == UndefLocNo) {
MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
MO.setSubReg(locations[OldLocNo].getSubReg());
NewLocNo = getLocationNo(MO);
@@ -783,6 +898,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
SlotIndex LStart = LocMapI.start();
SlotIndex LStop = LocMapI.stop();
+ DbgValueLocation OldLoc = LocMapI.value();
// Trim LocMapI down to the LII overlap.
if (LStart < LII->start)
@@ -791,17 +907,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LocMapI.setStopUnchecked(LII->end);
// Change the value in the overlap. This may trigger coalescing.
- LocMapI.setValue(NewLocNo);
+ LocMapI.setValue(OldLoc.changeLocNo(NewLocNo));
// Re-insert any removed OldLocNo ranges.
if (LStart < LocMapI.start()) {
- LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ LocMapI.insert(LStart, LocMapI.start(), OldLoc);
++LocMapI;
assert(LocMapI.valid() && "Unexpected coalescing");
}
if (LStop > LocMapI.stop()) {
++LocMapI;
- LocMapI.insert(LII->end, LStop, OldLocNo);
+ LocMapI.insert(LII->end, LStop, OldLoc);
--LocMapI;
}
}
@@ -824,14 +940,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
locations.erase(locations.begin() + OldLocNo);
LocMapI.goToBegin();
while (LocMapI.valid()) {
- unsigned v = LocMapI.value();
- if (v == OldLocNo) {
+ DbgValueLocation v = LocMapI.value();
+ if (v.locNo() == OldLocNo) {
DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
<< LocMapI.stop() << ")\n");
LocMapI.erase();
} else {
- if (v > OldLocNo)
- LocMapI.setValueUnchecked(v-1);
+ if (v.locNo() > OldLocNo)
+ LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
++LocMapI;
}
}
@@ -876,36 +992,73 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
}
-void
-UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
- // Iterate over locations in reverse makes it easier to handle coalescing.
- for (unsigned i = locations.size(); i ; --i) {
- unsigned LocNo = i-1;
- MachineOperand &Loc = locations[LocNo];
+void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
+ BitVector &SpilledLocations) {
+ // Build a set of new locations with new numbers so we can coalesce our
+ // IntervalMap if two vreg intervals collapse to the same physical location.
+ // Use MapVector instead of SetVector because MapVector::insert returns the
+ // position of the previously or newly inserted element. The boolean value
+ // tracks if the location was produced by a spill.
+ // FIXME: This will be problematic if we ever support direct and indirect
+ // frame index locations, i.e. expressing both variables in memory and
+ // 'int x, *px = &x'. The "spilled" bit must become part of the location.
+ MapVector<MachineOperand, bool> NewLocations;
+ SmallVector<unsigned, 4> LocNoMap(locations.size());
+ for (unsigned I = 0, E = locations.size(); I != E; ++I) {
+ bool Spilled = false;
+ MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
- if (!Loc.isReg() || !Loc.getReg() ||
- !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
- continue;
- unsigned VirtReg = Loc.getReg();
- if (VRM.isAssignedReg(VirtReg) &&
- TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
- // This can create a %noreg operand in rare cases when the sub-register
- // index is no longer available. That means the user value is in a
- // non-existent sub-register, and %noreg is exactly what we want.
- Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
- } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
- // FIXME: Translate SubIdx to a stackslot offset.
- Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
- } else {
- Loc.setReg(0);
- Loc.setSubReg(0);
+ if (Loc.isReg() && Loc.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ Spilled = true;
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ }
+
+ // Insert this location if it doesn't already exist and record a mapping
+ // from the old number to the new number.
+ auto InsertResult = NewLocations.insert({Loc, Spilled});
+ unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
+ LocNoMap[I] = NewLocNo;
+ }
+
+ // Rewrite the locations and record which ones were spill slots.
+ locations.clear();
+ SpilledLocations.clear();
+ SpilledLocations.resize(NewLocations.size());
+ for (auto &Pair : NewLocations) {
+ locations.push_back(Pair.first);
+ if (Pair.second) {
+ unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
+ SpilledLocations.set(NewLocNo);
}
- coalesceLocation(LocNo);
+ }
+
+ // Update the interval map, but only coalesce left, since intervals to the
+ // right use the old location numbers. This should merge two contiguous
+ // DBG_VALUE intervals with different vregs that were allocated to the same
+ // physical register.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ DbgValueLocation Loc = I.value();
+ unsigned NewLocNo = LocNoMap[Loc.locNo()];
+ I.setValueUnchecked(Loc.changeLocNo(NewLocNo));
+ I.setStart(I.start());
}
}
-/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
-/// instruction.
+/// Find an iterator for inserting a DBG_VALUE instruction.
static MachineBasicBlock::iterator
findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
LiveIntervals &LIS) {
@@ -928,52 +1081,111 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
std::next(MachineBasicBlock::iterator(MI));
}
-void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
- unsigned LocNo,
+/// Find an iterator for inserting the next DBG_VALUE instruction
+/// (or end if no more insert locations found).
+static MachineBasicBlock::iterator
+findNextInsertLocation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ SlotIndex StopIdx, MachineOperand &LocMO,
+ LiveIntervals &LIS,
+ const TargetRegisterInfo &TRI) {
+ if (!LocMO.isReg())
+ return MBB->instr_end();
+ unsigned Reg = LocMO.getReg();
+
+ // Find the next instruction in the MBB that define the register Reg.
+ while (I != MBB->end()) {
+ if (!LIS.isNotInMIMap(*I) &&
+ SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))
+ break;
+ if (I->definesRegister(Reg, &TRI))
+ // The insert location is directly after the instruction/bundle.
+ return std::next(I);
+ ++I;
+ }
+ return MBB->end();
+}
+
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+ SlotIndex StopIdx,
+ DbgValueLocation Loc, bool Spilled,
LiveIntervals &LIS,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
- MachineOperand &Loc = locations[LocNo];
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
+ // Only search within the current MBB.
+ StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS);
+ MachineOperand &MO = locations[Loc.locNo()];
++NumInsertedDebugValues;
assert(cast<DILocalVariable>(Variable)
->isValidLocationForIntrinsic(getDebugLoc()) &&
"Expected inlined-at fields to agree");
- if (Loc.isReg())
- BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, Loc.getReg(), offset, Variable, Expression);
- else
- BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .add(Loc)
- .addImm(offset)
- .addMetadata(Variable)
- .addMetadata(Expression);
+
+ // If the location was spilled, the new DBG_VALUE will be indirect. If the
+ // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
+ // that the original virtual register was a pointer.
+ const DIExpression *Expr = Expression;
+ bool IsIndirect = Loc.wasIndirect();
+ if (Spilled) {
+ if (IsIndirect)
+ Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ IsIndirect = true;
+ }
+
+ assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
+
+ do {
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .add(MO);
+ if (IsIndirect)
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U, RegState::Debug);
+ MIB.addMetadata(Variable).addMetadata(Expr);
+
+ // Continue and insert DBG_VALUES after every redefinition of register
+ // associated with the debug value within the range
+ I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI);
+ } while (I != MBB->end());
}
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const BitVector &SpilledLocations) {
MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
SlotIndex Start = I.start();
SlotIndex Stop = I.stop();
- unsigned LocNo = I.value();
- DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ DbgValueLocation Loc = I.value();
+ bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false;
+
+ // If the interval start was trimmed to the lexical scope insert the
+ // DBG_VALUE at the previous index (otherwise it appears after the
+ // first instruction in the range).
+ if (trimmedDefs.count(Start))
+ Start = Start.getPrevIndex();
+
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
- DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+ DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
- while(Stop > MBBEnd) {
+ while (Stop > MBBEnd) {
// Move to the next block.
Start = MBBEnd;
if (++MBB == MFEnd)
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
- DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
- insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+ DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
}
DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
@@ -988,10 +1200,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ BitVector SpilledLocations;
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
DEBUG(userValues[i]->print(dbgs(), TRI));
- userValues[i]->rewriteLocations(*VRM, *TRI);
- userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
}
EmitDone = true;
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 1d7e3d4371a2..aa35880b063a 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -1,4 +1,4 @@
-//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -22,17 +22,16 @@
#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Compiler.h"
namespace llvm {
template <typename T> class ArrayRef;
-class LiveInterval;
class LiveIntervals;
class VirtRegMap;
class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
- void *pImpl;
+ void *pImpl = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -62,14 +61,12 @@ public:
void dump() const;
private:
-
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
bool doInitialization(Module &) override;
-
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 9ef9f238fdce..302c75133e35 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1,4 +1,4 @@
-//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//===- LiveInterval.cpp - Live Interval Representation --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,20 +19,34 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveInterval.h"
-
#include "LiveRangeUtils.h"
#include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
+
//===----------------------------------------------------------------------===//
// Implementation of various methods necessary for calculation of live ranges.
// The implementation of the methods abstracts from the concrete type of the
@@ -56,8 +70,8 @@ protected:
CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
public:
- typedef LiveRange::Segment Segment;
- typedef IteratorT iterator;
+ using Segment = LiveRange::Segment;
+ using iterator = IteratorT;
/// A counterpart of LiveRange::createDeadDef: Make sure the range has a
/// value defined at @p Def.
@@ -265,8 +279,9 @@ private:
//===----------------------------------------------------------------------===//
class CalcLiveRangeUtilVector;
-typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
- LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+using CalcLiveRangeUtilVectorBase =
+ CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+ LiveRange::Segments>;
class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
public:
@@ -292,9 +307,9 @@ private:
//===----------------------------------------------------------------------===//
class CalcLiveRangeUtilSet;
-typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
- LiveRange::SegmentSet::iterator,
- LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+using CalcLiveRangeUtilSetBase =
+ CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, LiveRange::SegmentSet::iterator,
+ LiveRange::SegmentSet>;
class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
public:
@@ -327,7 +342,8 @@ private:
return I;
}
};
-} // namespace
+
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// LiveRange methods
@@ -444,7 +460,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
if (J == JE)
return false;
- for (;;) {
+ while (true) {
// J has just been advanced to satisfy:
assert(J->end >= I->start);
// Check for an overlap.
@@ -865,7 +881,6 @@ void LiveInterval::clearSubRanges() {
void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
-
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -925,8 +940,8 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
}
}
-raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
- return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
+raw_ostream& llvm::operator<<(raw_ostream& OS, const LiveRange::Segment &S) {
+ return OS << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -971,7 +986,7 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const {
}
void LiveInterval::print(raw_ostream &OS) const {
- OS << PrintReg(reg) << ' ';
+ OS << printReg(reg) << ' ';
super::print(OS);
// Print subranges
for (const SubRange &SR : subranges())
@@ -1033,7 +1048,6 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
}
#endif
-
//===----------------------------------------------------------------------===//
// LiveRangeUpdater class
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index b3248e53d0a5..3e742a6c2f21 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstdlib>
@@ -87,7 +87,7 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
}
for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
OS << " [" << SI.start() << ' ' << SI.stop() << "):"
- << PrintReg(SI.value()->reg, TRI);
+ << printReg(SI.value()->reg, TRI);
}
OS << '\n';
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervals.cpp
index 0e240f482a19..79fdba7e062a 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1,4 +1,4 @@
-//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===//
+//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -34,6 +34,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -44,8 +46,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -157,7 +157,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
// Dump the regunits.
for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
if (LiveRange *LR = RegUnitRanges[Unit])
- OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n';
+ OS << printRegUnit(Unit, TRI) << ' ' << *LR << '\n';
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
@@ -323,7 +323,7 @@ void LiveIntervals::computeLiveInRegUnits() {
// Create phi-defs at Begin for all live-in registers.
SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
- DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber());
+ DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
for (const auto &LI : MBB.liveins()) {
for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
@@ -335,7 +335,7 @@ void LiveIntervals::computeLiveInRegUnits() {
}
VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
(void)VNI;
- DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);
}
}
DEBUG(dbgs() << '\n');
@@ -698,11 +698,11 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Check if any of the regunits are live beyond the end of RI. That could
// happen when a physreg is defined as a copy of a virtreg:
//
- // %EAX = COPY %vreg5
- // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
- // BAR %EAX<kill>
+ // %eax = COPY %5
+ // FOO %5 <--- MI, cancel kill because %eax is live.
+ // BAR killed %eax
//
- // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+ // There should be no kill flag on FOO when %5 is rewritten as %eax.
for (auto &RUP : RU) {
const LiveRange &RURange = *RUP.first;
LiveRange::const_iterator &I = RUP.second;
@@ -719,13 +719,13 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// When reading a partial undefined value we must not add a kill flag.
// The regalloc might have used the undef lane for something else.
// Example:
- // %vreg1 = ... ; R32: %vreg1
- // %vreg2:high16 = ... ; R64: %vreg2
- // = read %vreg2<kill> ; R64: %vreg2
- // = read %vreg1 ; R32: %vreg1
- // The <kill> flag is correct for %vreg2, but the register allocator may
- // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
- // are actually never written by %vreg2. After assignment the <kill>
+ // %1 = ... ; R32: %1
+ // %2:high16 = ... ; R64: %2
+ // = read killed %2 ; R64: %2
+ // = read %1 ; R32: %1
+ // The <kill> flag is correct for %2, but the register allocator may
+ // assign R0L to %1, and R0 to %2 because the low 32bits of R0
+ // are actually never written by %2. After assignment the <kill>
// flag at the read instruction is invalid.
LaneBitmask DefinedLanesMask;
if (!SRs.empty()) {
@@ -824,7 +824,13 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI) {
- BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent());
+ return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
+}
+
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+ const MachineBlockFrequencyInfo *MBFI,
+ const MachineBasicBlock *MBB) {
+ BlockFrequency Freq = MBFI->getBlockFreq(MBB);
const float Scale = 1.0f / MBFI->getEntryFreq();
return (isDef + isUse) * (Freq.getFrequency() * Scale);
}
@@ -989,11 +995,11 @@ private:
DEBUG({
dbgs() << " ";
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- dbgs() << PrintReg(Reg);
+ dbgs() << printReg(Reg);
if (LaneMask.any())
dbgs() << " L" << PrintLaneMask(LaneMask);
} else {
- dbgs() << PrintRegUnit(Reg, &TRI);
+ dbgs() << printRegUnit(Reg, &TRI);
}
dbgs() << ":\t" << LR << '\n';
});
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index cde6ccd29dfd..f4b43a9b8ead 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -40,10 +40,8 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
}
}
-/// Simulates liveness when stepping backwards over an instruction(bundle):
-/// Remove Defs, add uses. This is the recommended way of calculating liveness.
-void LivePhysRegs::stepBackward(const MachineInstr &MI) {
- // Remove defined registers and regmask kills from the set.
+/// Remove defined registers and regmask kills from the set.
+void LivePhysRegs::removeDefs(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
if (!O->isDef())
@@ -55,8 +53,10 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
} else if (O->isRegMask())
removeRegsInMask(*O);
}
+}
- // Add uses to the set.
+/// Add uses to the set.
+void LivePhysRegs::addUses(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->readsReg())
continue;
@@ -67,6 +67,16 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
}
}
+/// Simulates liveness when stepping backwards over an instruction(bundle):
+/// Remove Defs, add uses. This is the recommended way of calculating liveness.
+void LivePhysRegs::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ removeDefs(MI);
+
+ // Add uses to the set.
+ addUses(MI);
+}
+
/// Simulates liveness when stepping forward over an instruction(bundle): Remove
/// killed-uses, add defs. This is the not recommended way, because it depends
/// on accurate kill flags. If possible use stepBackward() instead of this
@@ -116,7 +126,7 @@ void LivePhysRegs::print(raw_ostream &OS) const {
}
for (const_iterator I = begin(), E = end(); I != E; ++I)
- OS << " " << PrintReg(*I, TRI);
+ OS << " " << printReg(*I, TRI);
OS << "\n";
}
@@ -166,17 +176,32 @@ static void addCalleeSavedRegs(LivePhysRegs &LiveRegs,
LiveRegs.addReg(*CSR);
}
-/// Adds pristine registers to the given \p LiveRegs. Pristine registers are
-/// callee saved registers that are unused in the function.
-static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) {
+void LivePhysRegs::addPristines(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isCalleeSavedInfoValid())
return;
+ /// This function will usually be called on an empty object, handle this
+ /// as a special case.
+ if (empty()) {
+ /// Add all callee saved regs, then remove the ones that are saved and
+ /// restored.
+ addCalleeSavedRegs(*this, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ removeReg(Info.getReg());
+ return;
+ }
+ /// If a callee-saved register that is not pristine is already present
+ /// in the set, we should make sure that it stays in it. Precompute the
+ /// set of pristine registers in a separate object.
/// Add all callee saved regs, then remove the ones that are saved+restored.
- addCalleeSavedRegs(LiveRegs, MF);
+ LivePhysRegs Pristine(*TRI);
+ addCalleeSavedRegs(Pristine, MF);
/// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
- LiveRegs.removeReg(Info.getReg());
+ Pristine.removeReg(Info.getReg());
+ for (MCPhysReg R : Pristine)
+ addReg(R);
}
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
@@ -192,7 +217,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid()) {
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
- addReg(Info.getReg());
+ if (Info.isRestored())
+ addReg(Info.getReg());
}
}
}
@@ -200,7 +226,7 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
if (!MBB.succ_empty()) {
- addPristines(*this, MF);
+ addPristines(MF);
addLiveOutsNoPristines(MBB);
} else if (MBB.isReturnBlock()) {
// For the return block: Add all callee saved registers.
@@ -212,21 +238,27 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- addPristines(*this, MF);
+ addPristines(MF);
addBlockLiveIns(MBB);
}
void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
- const MachineRegisterInfo &MRI,
- MachineBasicBlock &MBB) {
+ const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- assert(MBB.livein_empty());
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
LiveRegs.stepBackward(MI);
+}
- for (unsigned Reg : LiveRegs) {
+void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
+ assert(MBB.livein_empty() && "Expected empty live-in list");
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (MCPhysReg Reg : LiveRegs) {
if (MRI.isReserved(Reg))
continue;
// Skip the register if we are about to add one of its super registers.
@@ -242,3 +274,56 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
MBB.addLiveIn(Reg);
}
}
+
+void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+
+ // We walk through the block backwards and start with the live outs.
+ LivePhysRegs LiveRegs;
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ // Recompute dead flags.
+ for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef() || MO->isDebug())
+ continue;
+
+ unsigned Reg = MO->getReg();
+ if (Reg == 0)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
+ bool IsNotLive = LiveRegs.available(MRI, Reg);
+ MO->setIsDead(IsNotLive);
+ }
+
+ // Step backward over defs.
+ LiveRegs.removeDefs(MI);
+
+ // Recompute kill flags.
+ for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->readsReg() || MO->isDebug())
+ continue;
+
+ unsigned Reg = MO->getReg();
+ if (Reg == 0)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
+ bool IsNotLive = LiveRegs.available(MRI, Reg);
+ MO->setIsKill(IsNotLive);
+ }
+
+ // Complete the stepbackward.
+ LiveRegs.addUses(MI);
+ }
+}
+
+void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs,
+ MachineBasicBlock &MBB) {
+ computeLiveIns(LiveRegs, MBB);
+ addLiveIns(MBB, LiveRegs);
+}
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 8c43c9f3f884..66c23b7b69ce 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,4 +1,4 @@
-//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,9 +12,27 @@
//===----------------------------------------------------------------------===//
#include "LiveRangeCalc.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -44,7 +62,6 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
LiveIn.clear();
}
-
static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
LiveRange &LR, const MachineOperand &MO) {
const MachineInstr &MI = *MO.getParent();
@@ -136,7 +153,6 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
createDeadDef(*Indexes, *Alloc, LR, MO);
}
-
void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
LiveInterval *LI) {
SmallVector<SlotIndex, 4> Undefs;
@@ -148,7 +164,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
// Clear all kill flags. They will be reinserted after register allocation
- // by LiveIntervalAnalysis::addKillFlags().
+ // by LiveIntervals::addKillFlags().
if (MO.isUse())
MO.setIsKill(false);
// MO::readsReg returns "true" for subregister defs. This is for keeping
@@ -197,7 +213,6 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
}
}
-
void LiveRangeCalc::updateFromLiveIns() {
LiveRangeUpdater Updater;
for (const LiveInBlock &I : LiveIn) {
@@ -248,7 +263,6 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
calculateValues();
}
-
// This function is called by a client after using the low-level API to add
// live-out and live-in blocks. The unique value optimization is not
// available, SplitEditor::transferValues handles that case directly anyway.
@@ -259,7 +273,6 @@ void LiveRangeCalc::calculateValues() {
updateFromLiveIns();
}
-
bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
MachineBasicBlock &MBB, BitVector &DefOnEntry,
BitVector &UndefOnEntry) {
@@ -351,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#ifndef NDEBUG
if (MBB->pred_empty()) {
MBB->getParent()->verify();
- errs() << "Use of " << PrintReg(PhysReg)
+ errs() << "Use of " << printReg(PhysReg)
<< " does not have a corresponding definition on every path:\n";
const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
if (MI != nullptr)
@@ -363,8 +376,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
!MBB->isLiveIn(PhysReg)) {
MBB->getParent()->verify();
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- errs() << "The register " << PrintReg(PhysReg, TRI)
- << " needs to be live in to BB#" << MBB->getNumber()
+ errs() << "The register " << printReg(PhysReg, TRI)
+ << " needs to be live in to " << printMBBReference(*MBB)
<< ", but is missing from the live-in list.\n";
report_fatal_error("Invalid global physical register");
}
@@ -410,7 +423,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
LiveIn.clear();
FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
- if (Undefs.size() > 0 && FoundUndef)
+ if (!Undefs.empty() && FoundUndef)
UniqueVNI = false;
// Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
@@ -454,7 +467,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
LiveIn.reserve(WorkList.size());
for (unsigned BN : WorkList) {
MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
- if (Undefs.size() > 0 &&
+ if (!Undefs.empty() &&
!isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
@@ -465,7 +478,6 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
return false;
}
-
// This is essentially the same iterative algorithm that SSAUpdater uses,
// except we already have a dominator tree, so we don't have to recompute it.
void LiveRangeCalc::updateSSA() {
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index d41b782d9bdf..c4914f23f56d 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -1,4 +1,4 @@
-//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,28 +26,35 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <utility>
namespace llvm {
-/// Forward declarations for MachineDominators.h:
-class MachineDominatorTree;
template <class NodeT> class DomTreeNodeBase;
-typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+class MachineDominatorTree;
+class MachineFunction;
+class MachineRegisterInfo;
+
+using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
class LiveRangeCalc {
- const MachineFunction *MF;
- const MachineRegisterInfo *MRI;
- SlotIndexes *Indexes;
- MachineDominatorTree *DomTree;
- VNInfo::Allocator *Alloc;
+ const MachineFunction *MF = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ SlotIndexes *Indexes = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ VNInfo::Allocator *Alloc = nullptr;
/// LiveOutPair - A value and the block that defined it. The domtree node is
/// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
- typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+ using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>;
/// LiveOutMap - Map basic blocks to the value leaving the block.
- typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+ using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>;
/// Bit vector of active entries in LiveOut, also used as a visited set by
/// findReachingDefs. One entry per basic block, indexed by block number.
@@ -66,7 +73,7 @@ class LiveRangeCalc {
/// registers do not overlap), but the defined/undefined information must
/// be kept separate for each individual range.
/// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>;
EntryInfoMap EntryInfos;
/// Map each basic block where a live range is live out to the live-out value
@@ -105,10 +112,10 @@ class LiveRangeCalc {
SlotIndex Kill;
// Live-in value filled in by updateSSA once it is known.
- VNInfo *Value;
+ VNInfo *Value = nullptr;
LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
- : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {}
+ : LR(LR), DomNode(node), Kill(kill) {}
};
/// LiveIn - Work list of blocks where the live-in value has yet to be
@@ -171,8 +178,7 @@ class LiveRangeCalc {
void resetLiveOutMap();
public:
- LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr),
- DomTree(nullptr), Alloc(nullptr) {}
+ LiveRangeCalc() = default;
//===--------------------------------------------------------------------===//
// High-level interface.
@@ -186,10 +192,8 @@ public:
/// that may overlap a previously computed live range, and before the first
/// live range in a function. If live ranges are not known to be
/// non-overlapping, call reset before each.
- void reset(const MachineFunction *MF,
- SlotIndexes*,
- MachineDominatorTree*,
- VNInfo::Allocator*);
+ void reset(const MachineFunction *mf, SlotIndexes *SI,
+ MachineDominatorTree *MDT, VNInfo::Allocator *VNIA);
//===--------------------------------------------------------------------===//
// Mid-level interface.
@@ -282,4 +286,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 92cca1a54951..86cfbd87f5b1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -14,12 +14,12 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -465,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
if (MRI.recomputeRegClass(LI.reg))
DEBUG({
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ dbgs() << "Inflated " << printReg(LI.reg) << " to "
<< TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
});
VRAI.calculateSpillWeightAndHint(LI);
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index 552f4b5393fe..02e1f3b01ade 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -1,4 +1,4 @@
-//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
+//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,20 +14,32 @@
/// uses, all of which are the only use of the def.
///
///===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
#define DEBUG_TYPE "lrshrink"
STATISTIC(NumInstrsHoistedToShrinkLiveRange,
"Number of insructions hoisted to shrink live range.");
-using namespace llvm;
-
namespace {
+
class LiveRangeShrink : public MachineFunctionPass {
public:
static char ID;
@@ -45,23 +57,26 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
char LiveRangeShrink::ID = 0;
+
char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
false)
-namespace {
-typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
+
+using InstOrderMap = DenseMap<MachineInstr *, unsigned>;
/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
/// \p M maintains a map from instruction to its dominating order that satisfies
/// M[A] > M[B] guarantees that A is dominated by B.
/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
/// \p New.
-MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
- const InstOrderMap &M) {
+static MachineInstr *FindDominatedInstruction(MachineInstr &New,
+ MachineInstr *Old,
+ const InstOrderMap &M) {
auto NewIter = M.find(&New);
if (NewIter == M.end())
return Old;
@@ -82,16 +97,16 @@ MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
/// Builds Instruction to its dominating order number map \p M by traversing
/// from instruction \p Start.
-void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
+static void BuildInstOrderMap(MachineBasicBlock::iterator Start,
+ InstOrderMap &M) {
M.clear();
unsigned i = 0;
for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
M[&I] = i++;
}
-} // end anonymous namespace
bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 60033db38ee4..bd435968296d 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,17 +15,17 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
using namespace llvm;
@@ -102,14 +102,14 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
}
void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
- << " to " << PrintReg(PhysReg, TRI) << ':');
+ DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI)
+ << " to " << printReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
- DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range);
+ DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
Matrix[Unit].unify(VirtReg, Range);
return false;
});
@@ -120,13 +120,13 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
unsigned PhysReg = VRM->getPhys(VirtReg.reg);
- DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
- << " from " << PrintReg(PhysReg, TRI) << ':');
+ DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI)
+ << " from " << printReg(PhysReg, TRI) << ':');
VRM->clearVirt(VirtReg.reg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
- DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI));
+ DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
Matrix[Unit].extract(VirtReg, Range);
return false;
});
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index f9ba4ffa6527..9f28db6287ba 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -97,23 +97,37 @@ static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
LiveUnits.addReg(*CSR);
}
-/// Adds pristine registers to the given \p LiveUnits. Pristine registers are
-/// callee saved registers that are unused in the function.
-static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) {
+void LiveRegUnits::addPristines(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isCalleeSavedInfoValid())
return;
+ /// This function will usually be called on an empty object, handle this
+ /// as a special case.
+ if (empty()) {
+ /// Add all callee saved regs, then remove the ones that are saved and
+ /// restored.
+ addCalleeSavedRegs(*this, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ removeReg(Info.getReg());
+ return;
+ }
+ /// If a callee-saved register that is not pristine is already present
+ /// in the set, we should make sure that it stays in it. Precompute the
+ /// set of pristine registers in a separate object.
/// Add all callee saved regs, then remove the ones that are saved+restored.
- addCalleeSavedRegs(LiveUnits, MF);
+ LiveRegUnits Pristine(*TRI);
+ addCalleeSavedRegs(Pristine, MF);
/// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
- LiveUnits.removeReg(Info.getReg());
+ Pristine.removeReg(Info.getReg());
+ addUnits(Pristine.getBitVector());
}
void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
if (!MBB.succ_empty()) {
- addPristines(*this, MF);
+ addPristines(MF);
// To get the live-outs we simply merge the live-ins of all successors.
for (const MachineBasicBlock *Succ : MBB.successors())
addBlockLiveIns(*this, *Succ);
@@ -127,6 +141,6 @@ void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- addPristines(*this, MF);
+ addPristines(MF);
addBlockLiveIns(*this, MBB);
}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b51f8b0aa6bb..b0e58b0e3e5f 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -14,12 +14,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "livestacks"
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index a9aec926115a..032dd66ae1d2 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
using namespace llvm;
@@ -235,7 +234,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
// Otherwise, the last sub-register def implicitly defines this register.
// e.g.
// AH =
- // AL = ... <imp-def EAX>, <imp-kill AH>
+ // AL = ... implicit-def EAX, implicit killed AH
// = AH
// ...
// = EAX
@@ -321,17 +320,17 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// AH =
//
// = AX
- // = AL, AX<imp-use, kill>
+ // = AL, implicit killed AX
// AX =
//
// Or whole register is defined, but not used at all.
- // AX<dead> =
+ // dead AX =
// ...
// AX =
//
// Or whole register is defined, but only partly used.
- // AX<dead> = AL<imp-def>
- // = AL<kill>
+ // dead AX = implicit-def AL
+ // = killed AL
// AX =
MachineInstr *LastPartDef = nullptr;
unsigned LastPartDefDist = 0;
@@ -364,7 +363,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
if (!PhysRegUse[Reg]) {
// Partial uses. Mark register def dead and add implicit def of
// sub-registers which are used.
- // EAX<dead> = op AL<imp-def>
+ // dead EAX = op implicit-def AL
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index b109f1922a3e..c0da37ede849 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -14,29 +14,30 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
using namespace llvm;
@@ -47,6 +48,7 @@ STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
STATISTIC(NumReplacements, "Number of frame indices references replaced");
namespace {
+
class FrameRef {
MachineBasicBlock::iterator MI; // Instr referencing the frame
int64_t LocalOffset; // Local offset of the frame idx referenced
@@ -72,9 +74,10 @@ namespace {
};
class LocalStackSlotPass: public MachineFunctionPass {
- SmallVector<int64_t,16> LocalOffsets;
+ SmallVector<int64_t, 16> LocalOffsets;
+
/// StackObjSet - A set of stack object indexes
- typedef SmallSetVector<int, 8> StackObjSet;
+ using StackObjSet = SmallSetVector<int, 8>;
void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
bool StackGrowsDown, unsigned &MaxAlign);
@@ -84,11 +87,14 @@ namespace {
int64_t &Offset, unsigned &MaxAlign);
void calculateFrameObjectOffsets(MachineFunction &Fn);
bool insertFrameReferenceRegisters(MachineFunction &Fn);
+
public:
static char ID; // Pass identification, replacement for typeid
+
explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
}
+
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -96,20 +102,20 @@ namespace {
AU.addRequired<StackProtector>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
- private:
};
+
} // end anonymous namespace
char LocalStackSlotPass::ID = 0;
+
char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+
INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
-
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -178,7 +184,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
MachineFrameInfo &MFI,
bool StackGrowsDown, int64_t &Offset,
unsigned &MaxAlign) {
-
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
E = UnassignedObjs.end(); I != E; ++I) {
int i = *I;
@@ -189,7 +194,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
-///
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo &MFI = Fn.getFrameInfo();
@@ -397,7 +401,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
continue;
}
- const MachineFunction *MF = MI.getParent()->getParent();
+ const MachineFunction *MF = MI.getMF();
const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 0fc48d4e0b6b..0cf578b50563 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -16,11 +16,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLowering.h"
using namespace llvm;
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
new file mode 100644
index 000000000000..4b676a60a8cd
--- /dev/null
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -0,0 +1,625 @@
+//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to employ a canonical code transformation so
+// that code compiled with slightly different IR passes can be diffed more
+// effectively than otherwise. This is done by renaming vregs in a given
+// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
+// move defs closer to their use inorder to reduce diffs caused by slightly
+// different schedules.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-canonicalizer example.mir
+//
+// Reorders instructions canonically.
+// Renames virtual register operands canonically.
+// Strips certain MIR artifacts (optionally).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRCanonicalizerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-canonicalizer"
+
+static cl::opt<unsigned>
+CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
+ cl::value_desc("N"),
+ cl::desc("Function number to canonicalize."));
+
+static cl::opt<unsigned>
+CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u),
+ cl::value_desc("N"),
+ cl::desc("BasicBlock number to canonicalize."));
+
+namespace {
+
+class MIRCanonicalizer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRCanonicalizer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename register operands in a canonical ordering.";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
+class TypedVReg {
+ VRType type;
+ unsigned reg;
+
+public:
+ TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {}
+ TypedVReg(VRType type) : type(type), reg(~0U) {
+ assert(type != RSE_Reg && "Expected a non-register type.");
+ }
+
+ bool isReg() const { return type == RSE_Reg; }
+ bool isFrameIndex() const { return type == RSE_FrameIndex; }
+ bool isCandidate() const { return type == RSE_NewCandidate; }
+
+ VRType getType() const { return type; }
+ unsigned getReg() const {
+ assert(this->isReg() && "Expected a virtual or physical register.");
+ return reg;
+ }
+};
+
+char MIRCanonicalizer::ID;
+
+char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ std::vector<MachineBasicBlock *> RPOList;
+ for (auto MBB : RPOT) {
+ RPOList.push_back(MBB);
+ }
+
+ return RPOList;
+}
+
+// Set a dummy vreg. We use this vregs register class to generate throw-away
+// vregs that are used to skip vreg numbers so that vreg numbers line up.
+static unsigned GetDummyVReg(const MachineFunction &MF) {
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ return MO.getReg();
+ }
+ }
+ }
+
+ return ~0U;
+}
+
+static bool rescheduleCanonically(MachineBasicBlock *MBB) {
+
+ bool Changed = false;
+
+ // Calculates the distance of MI from the begining of its parent BB.
+ auto getInstrIdx = [](const MachineInstr &MI) {
+ unsigned i = 0;
+ for (auto &CurMI : *MI.getParent()) {
+ if (&CurMI == &MI)
+ return i;
+ i++;
+ }
+ return ~0U;
+ };
+
+ // Pre-Populate vector of instructions to reschedule so that we don't
+ // clobber the iterator.
+ std::vector<MachineInstr *> Instructions;
+ for (auto &MI : *MBB) {
+ Instructions.push_back(&MI);
+ }
+
+ for (auto *II : Instructions) {
+ if (II->getNumOperands() == 0)
+ continue;
+
+ MachineOperand &MO = II->getOperand(0);
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
+
+ MachineInstr *Def = II;
+ unsigned Distance = ~0U;
+ MachineInstr *UseToBringDefCloserTo = nullptr;
+ MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+ for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
+ MachineInstr *UseInst = UO.getParent();
+
+ const unsigned DefLoc = getInstrIdx(*Def);
+ const unsigned UseLoc = getInstrIdx(*UseInst);
+ const unsigned Delta = (UseLoc - DefLoc);
+
+ if (UseInst->getParent() != Def->getParent())
+ continue;
+ if (DefLoc >= UseLoc)
+ continue;
+
+ if (Delta < Distance) {
+ Distance = Delta;
+ UseToBringDefCloserTo = UseInst;
+ }
+ }
+
+ const auto BBE = MBB->instr_end();
+ MachineBasicBlock::iterator DefI = BBE;
+ MachineBasicBlock::iterator UseI = BBE;
+
+ for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
+
+ if (DefI != BBE && UseI != BBE)
+ break;
+
+ if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
+ continue;
+
+ if (&*BBI == Def) {
+ DefI = BBI;
+ continue;
+ }
+
+ if (&*BBI == UseToBringDefCloserTo) {
+ UseI = BBI;
+ continue;
+ }
+ }
+
+ if (DefI == BBE || UseI == BBE)
+ continue;
+
+ DEBUG({
+ dbgs() << "Splicing ";
+ DefI->dump();
+ dbgs() << " right before: ";
+ UseI->dump();
+ });
+
+ Changed = true;
+ MBB->splice(UseI, MBB, DefI);
+ }
+
+ return Changed;
+}
+
+/// Here we find our candidates. What makes an interesting candidate?
+/// An candidate for a canonicalization tree root is normally any kind of
+/// instruction that causes side effects such as a store to memory or a copy to
+/// a physical register or a return instruction. We use these as an expression
+/// tree root that we walk inorder to build a canonical walk which should result
+/// in canoncal vreg renaming.
+static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
+ std::vector<MachineInstr *> Candidates;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ MachineInstr *MI = &*II;
+
+ bool DoesMISideEffect = false;
+
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
+ const unsigned Dst = MI->getOperand(0).getReg();
+ DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
+
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
+ if (DoesMISideEffect) break;
+ DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
+ }
+ }
+
+ if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
+ continue;
+
+ DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
+ Candidates.push_back(MI);
+ }
+
+ return Candidates;
+}
+
+static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
+ std::queue<TypedVReg> &RegQueue,
+ std::vector<MachineInstr *> &VisitedMIs,
+ const MachineBasicBlock *MBB) {
+
+ const MachineFunction &MF = *MBB->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ while (!RegQueue.empty()) {
+
+ auto TReg = RegQueue.front();
+ RegQueue.pop();
+
+ if (TReg.isFrameIndex()) {
+ DEBUG(dbgs() << "Popping frame index.\n";);
+ VRegs.push_back(TypedVReg(RSE_FrameIndex));
+ continue;
+ }
+
+ assert(TReg.isReg() && "Expected vreg or physreg.");
+ unsigned Reg = TReg.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DEBUG({
+ dbgs() << "Popping vreg ";
+ MRI.def_begin(Reg)->dump();
+ dbgs() << "\n";
+ });
+
+ if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
+ return TR.isReg() && TR.getReg() == Reg;
+ })) {
+ VRegs.push_back(TypedVReg(Reg));
+ }
+ } else {
+ DEBUG(dbgs() << "Popping physreg.\n";);
+ VRegs.push_back(TypedVReg(Reg));
+ continue;
+ }
+
+ for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
+ MachineInstr *Def = RI->getParent();
+
+ if (Def->getParent() != MBB)
+ continue;
+
+ if (llvm::any_of(VisitedMIs,
+ [&](const MachineInstr *VMI) { return Def == VMI; })) {
+ break;
+ }
+
+ DEBUG({
+ dbgs() << "\n========================\n";
+ dbgs() << "Visited MI: ";
+ Def->dump();
+ dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
+ dbgs() << "\n========================\n";
+ });
+ VisitedMIs.push_back(Def);
+ for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+
+ MachineOperand &MO = Def->getOperand(I);
+ if (MO.isFI()) {
+ DEBUG(dbgs() << "Pushing frame index.\n";);
+ RegQueue.push(TypedVReg(RSE_FrameIndex));
+ }
+
+ if (!MO.isReg())
+ continue;
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+ }
+ }
+}
+
+// TODO: Work to remove this in the future. One day when we have named vregs
+// we should be able to form the canonical name based on some characteristic
+// we see in that point of the expression tree (like if we were to name based
+// on some sort of value numbering scheme).
+static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) {
+ const unsigned VR_GAP = (++VRegGapIndex * 1000);
+
+ DEBUG({
+ dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to "
+ << VR_GAP << "\n";
+ });
+
+ unsigned I = MRI.createVirtualRegister(RC);
+ const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
+ while (I != E) {
+ I = MRI.createVirtualRegister(RC);
+ }
+}
+
+static std::map<unsigned, unsigned>
+GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
+ const std::vector<unsigned> &renamedInOtherBB,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) {
+ std::map<unsigned, unsigned> VRegRenameMap;
+ unsigned LastRenameReg = MRI.createVirtualRegister(RC);
+ bool FirstCandidate = true;
+
+ for (auto &vreg : VRegs) {
+ if (vreg.isFrameIndex()) {
+ // We skip one vreg for any frame index because there is a good chance
+ // (especially when comparing SelectionDAG to GlobalISel generated MIR)
+ // that in the other file we are just getting an incoming vreg that comes
+ // from a copy from a frame index. So it's safe to skip by one.
+ LastRenameReg = MRI.createVirtualRegister(RC);
+ DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
+ continue;
+ } else if (vreg.isCandidate()) {
+
+ // After the first candidate, for every subsequent candidate, we skip mod
+ // 10 registers so that the candidates are more likely to start at the
+ // same vreg number making it more likely that the canonical walk from the
+ // candidate insruction. We don't need to skip from the first candidate of
+ // the BasicBlock because we already skip ahead several vregs for each BB.
+ while (LastRenameReg % 10) {
+ if (!FirstCandidate) break;
+ LastRenameReg = MRI.createVirtualRegister(RC);
+
+ DEBUG({
+ dbgs() << "Skipping rename for new candidate " << LastRenameReg
+ << "\n";
+ });
+ }
+ FirstCandidate = false;
+ continue;
+ } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
+ LastRenameReg = MRI.createVirtualRegister(RC);
+ DEBUG({
+ dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
+ });
+ continue;
+ }
+
+ auto Reg = vreg.getReg();
+ if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
+ DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";);
+ continue;
+ }
+
+ auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ LastRenameReg = Rename;
+
+ if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
+ DEBUG(dbgs() << "Mapping vreg ";);
+ if (MRI.reg_begin(Reg) != MRI.reg_end()) {
+ DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
+ } else {
+ DEBUG(dbgs() << Reg;);
+ }
+ DEBUG(dbgs() << " to ";);
+ if (MRI.reg_begin(Rename) != MRI.reg_end()) {
+ DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
+ } else {
+ DEBUG(dbgs() << Rename;);
+ }
+ DEBUG(dbgs() << "\n";);
+
+ VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
+ }
+ }
+
+ return VRegRenameMap;
+}
+
+static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB,
+ const std::map<unsigned, unsigned> &VRegRenameMap,
+ MachineRegisterInfo &MRI) {
+ bool Changed = false;
+ for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
+
+ auto VReg = I->first;
+ auto Rename = I->second;
+
+ RenamedInOtherBB.push_back(Rename);
+
+ std::vector<MachineOperand *> RenameMOs;
+ for (auto &MO : MRI.reg_operands(VReg)) {
+ RenameMOs.push_back(&MO);
+ }
+
+ for (auto *MO : RenameMOs) {
+ Changed = true;
+ MO->setReg(Rename);
+
+ if (!MO->isDef())
+ MO->setIsKill(false);
+ }
+ }
+
+ return Changed;
+}
+
+static bool doDefKillClear(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ for (auto &MI : *MBB) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.isDef() && MO.isKill()) {
+ Changed = true;
+ MO.setIsKill(false);
+ }
+
+ if (MO.isDef() && MO.isDead()) {
+ Changed = true;
+ MO.setIsDead(false);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+static bool runOnBasicBlock(MachineBasicBlock *MBB,
+ std::vector<StringRef> &bbNames,
+ std::vector<unsigned> &renamedInOtherBB,
+ unsigned &basicBlockNum, unsigned &VRegGapIndex) {
+
+ if (CanonicalizeBasicBlockNumber != ~0U) {
+ if (CanonicalizeBasicBlockNumber != basicBlockNum++)
+ return false;
+ DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";);
+ }
+
+ if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
+ DEBUG({
+ dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
+ << "\n";
+ });
+ return false;
+ }
+
+ DEBUG({
+ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ });
+
+ bool Changed = false;
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const unsigned DummyVReg = GetDummyVReg(MF);
+ const TargetRegisterClass *DummyRC =
+ (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg);
+ if (!DummyRC) return false;
+
+ bbNames.push_back(MBB->getName());
+ DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
+
+ DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
+ Changed |= rescheduleCanonically(MBB);
+ DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
+
+ std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
+ std::vector<MachineInstr *> VisitedMIs;
+ std::copy(Candidates.begin(), Candidates.end(),
+ std::back_inserter(VisitedMIs));
+
+ std::vector<TypedVReg> VRegs;
+ for (auto candidate : Candidates) {
+ VRegs.push_back(TypedVReg(RSE_NewCandidate));
+
+ std::queue<TypedVReg> RegQueue;
+
+ // Here we walk the vreg operands of a non-root node along our walk.
+ // The root nodes are the original candidates (stores normally).
+ // These are normally not the root nodes (except for the case of copies to
+ // physical registers).
+ for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
+ if (candidate->mayStore() || candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+
+ DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+
+ // Here we walk the root candidates. We start from the 0th operand because
+ // the root is normally a store to a vreg.
+ for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
+
+ if (!candidate->mayStore() && !candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+
+ // TODO: Do we want to only add vregs here?
+ if (!MO.isReg() && !MO.isFI())
+ continue;
+
+ DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
+
+ RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) :
+ TypedVReg(RSE_FrameIndex));
+ }
+
+ doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
+ }
+
+ // If we have populated no vregs to rename then bail.
+ // The rest of this function does the vreg remaping.
+ if (VRegs.size() == 0)
+ return Changed;
+
+ // Skip some vregs, so we can recon where we'll land next.
+ SkipVRegs(VRegGapIndex, MRI, DummyRC);
+
+ auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
+ Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+ Changed |= doDefKillClear(MBB);
+
+ DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
+ DEBUG(dbgs() << "\n\n================================================\n\n");
+ return Changed;
+}
+
+bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
+
+ static unsigned functionNum = 0;
+ if (CanonicalizeFunctionNumber != ~0U) {
+ if (CanonicalizeFunctionNumber != functionNum++)
+ return false;
+ DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";);
+ }
+
+ // we need a valid vreg to create a vreg type for skipping all those
+ // stray vreg numbers so reach alignment/canonical vreg values.
+ std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF);
+
+ DEBUG(
+ dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
+ for (auto MBB : RPOList) {
+ dbgs() << MBB->getName() << "\n";
+ }
+ dbgs() << "\n\n================================================\n\n";
+ );
+
+ std::vector<StringRef> BBNames;
+ std::vector<unsigned> RenamedInOtherBB;
+
+ unsigned GapIdx = 0;
+ unsigned BBNum = 0;
+
+ bool Changed = false;
+
+ for (auto MBB : RPOList)
+ Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx);
+
+ return Changed;
+}
+
diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt
index 2852124786e3..6269b005a985 100644
--- a/lib/CodeGen/MIRParser/LLVMBuild.txt
+++ b/lib/CodeGen/MIRParser/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = MIRParser
parent = CodeGen
-required_libraries = AsmParser CodeGen Core MC Support Target
+required_libraries = AsmParser BinaryFormat CodeGen Core MC Support Target
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 58a655a4dee4..6adb7f1288d7 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -1,4 +1,4 @@
-//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
+//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,27 +12,33 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include <algorithm>
+#include <cassert>
#include <cctype>
+#include <string>
using namespace llvm;
namespace {
-typedef function_ref<void(StringRef::iterator Loc, const Twine &)>
- ErrorCallbackType;
+using ErrorCallbackType =
+ function_ref<void(StringRef::iterator Loc, const Twine &)>;
/// This class provides a way to iterate and get characters from the source
/// string.
class Cursor {
- const char *Ptr;
- const char *End;
+ const char *Ptr = nullptr;
+ const char *End = nullptr;
public:
- Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
+ Cursor(NoneType) {}
explicit Cursor(StringRef Str) {
Ptr = Str.data();
@@ -202,14 +208,24 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("internal", MIToken::kw_internal)
.Case("early-clobber", MIToken::kw_early_clobber)
.Case("debug-use", MIToken::kw_debug_use)
+ .Case("renamable", MIToken::kw_renamable)
.Case("tied-def", MIToken::kw_tied_def)
.Case("frame-setup", MIToken::kw_frame_setup)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
+ .Case("rel_offset", MIToken::kw_cfi_rel_offset)
.Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
.Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
+ .Case("escape", MIToken::kw_cfi_escape)
.Case("def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("remember_state", MIToken::kw_cfi_remember_state)
+ .Case("restore", MIToken::kw_cfi_restore)
+ .Case("restore_state", MIToken::kw_cfi_restore_state)
+ .Case("undefined", MIToken::kw_cfi_undefined)
+ .Case("register", MIToken::kw_cfi_register)
+ .Case("window_save", MIToken::kw_cfi_window_save)
.Case("blockaddress", MIToken::kw_blockaddress)
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
@@ -270,6 +286,9 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
C.advance();
StringRef Number = NumberRange.upto(C);
unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
+ // TODO: The format bb.<id>.<irname> is supported only when it's not a
+ // reference. Once we deprecate the format where the irname shows up, we
+ // should only lex forward if it is a reference.
if (C.peek() == '.') {
C.advance(); // Skip '.'
++StringOffset;
@@ -490,6 +509,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Case("!alias.scope", MIToken::md_alias_scope)
.Case("!noalias", MIToken::md_noalias)
.Case("!range", MIToken::md_range)
+ .Case("!DIExpression", MIToken::md_diexpr)
.Default(MIToken::Error);
}
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 08b82e59c4fc..0204d549d5d4 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -1,4 +1,4 @@
-//===- MILexer.h - Lexer for machine instructions -------------------------===//
+//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include <functional>
+#include <string>
namespace llvm {
@@ -60,14 +60,24 @@ struct MIToken {
kw_internal,
kw_early_clobber,
kw_debug_use,
+ kw_renamable,
kw_tied_def,
kw_frame_setup,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
+ kw_cfi_rel_offset,
kw_cfi_def_cfa_register,
kw_cfi_def_cfa_offset,
+ kw_cfi_adjust_cfa_offset,
+ kw_cfi_escape,
kw_cfi_def_cfa,
+ kw_cfi_register,
+ kw_cfi_remember_state,
+ kw_cfi_restore,
+ kw_cfi_restore_state,
+ kw_cfi_undefined,
+ kw_cfi_window_save,
kw_blockaddress,
kw_intrinsic,
kw_target_index,
@@ -100,6 +110,7 @@ struct MIToken {
md_alias_scope,
md_noalias,
md_range,
+ md_diexpr,
// Identifier tokens
Identifier,
@@ -132,14 +143,14 @@ struct MIToken {
};
private:
- TokenKind Kind;
+ TokenKind Kind = Error;
StringRef Range;
StringRef StringValue;
std::string StringValueStorage;
APSInt IntVal;
public:
- MIToken() : Kind(Error) {}
+ MIToken() = default;
MIToken &reset(TokenKind Kind, StringRef Range);
@@ -164,7 +175,8 @@ public:
return Kind == kw_implicit || Kind == kw_implicit_define ||
Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
Kind == kw_undef || Kind == kw_internal ||
- Kind == kw_early_clobber || Kind == kw_debug_use;
+ Kind == kw_early_clobber || Kind == kw_debug_use ||
+ Kind == kw_renamable;
}
bool isMemoryOperandFlag() const {
@@ -203,4 +215,4 @@ StringRef lexMIToken(
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c68d87b15a31..1a78ae3aad07 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MILexer.h"
#include "MIParser.h"
+#include "MILexer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -21,8 +21,8 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
@@ -33,12 +33,15 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -63,11 +66,8 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -209,9 +209,11 @@ public:
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
bool parseMDNode(MDNode *&Node);
+ bool parseDIExpression(MDNode *&Node);
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
bool parseCFIRegister(unsigned &Reg);
+ bool parseCFIEscapeValues(std::string& Values);
bool parseCFIOperand(MachineOperand &Dest);
bool parseIRBlock(BasicBlock *&BB, const Function &F);
bool parseBlockAddressOperand(MachineOperand &Dest);
@@ -429,7 +431,7 @@ bool MIParser::parseBasicBlockDefinition(
break;
case MIToken::IRBlock:
// TODO: Report an error when both name and ir block are specified.
- if (parseIRBlock(BB, *MF.getFunction()))
+ if (parseIRBlock(BB, MF.getFunction()))
return true;
lex();
break;
@@ -445,7 +447,7 @@ bool MIParser::parseBasicBlockDefinition(
if (!Name.empty()) {
BB = dyn_cast_or_null<BasicBlock>(
- MF.getFunction()->getValueSymbolTable()->lookup(Name));
+ MF.getFunction().getValueSymbolTable()->lookup(Name));
if (!BB)
return error(Loc, Twine("basic block '") + Name +
"' is not defined in the function '" +
@@ -854,10 +856,14 @@ bool MIParser::parseStandaloneStackObject(int &FI) {
bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
lex();
- if (Token.isNot(MIToken::exclaim))
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_diexpr)) {
+ if (parseDIExpression(Node))
+ return true;
+ } else
return error("expected a metadata node");
- if (parseMDNode(Node))
- return true;
if (Token.isNot(MIToken::Eof))
return error("expected end of string after the metadata node");
return false;
@@ -1054,6 +1060,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
case MIToken::kw_debug_use:
Flags |= RegState::Debug;
break;
+ case MIToken::kw_renamable:
+ Flags |= RegState::Renamable;
+ break;
default:
llvm_unreachable("The current token should be a register flag");
}
@@ -1206,7 +1215,8 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
- Flags & RegState::InternalRead);
+ Flags & RegState::InternalRead, Flags & RegState::Renamable);
+
return false;
}
@@ -1224,7 +1234,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
const Constant *&C) {
auto Source = StringValue.str(); // The source has to be null terminated.
SMDiagnostic Err;
- C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(),
+ C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
&PFS.IRSlots);
if (!C)
return error(Loc + Err.getColumnNo(), Err.getMessage());
@@ -1244,7 +1254,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
lex();
return false;
} else if (Token.is(MIToken::PointerType)) {
- const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
+ const DataLayout &DL = MF.getDataLayout();
unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
lex();
@@ -1338,6 +1348,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
return error(Twine("use of undefined machine basic block #") +
Twine(Number));
MBB = MBBInfo->second;
+ // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once
+ // we drop the <irname> from the bb.<id>.<irname> format.
if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName())
return error(Twine("the name of machine basic block #") + Twine(Number) +
" isn't '" + Token.stringValue() + "'");
@@ -1407,7 +1419,7 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
bool MIParser::parseGlobalValue(GlobalValue *&GV) {
switch (Token.kind()) {
case MIToken::NamedGlobalValue: {
- const Module *M = MF.getFunction()->getParent();
+ const Module *M = MF.getFunction().getParent();
GV = M->getNamedValue(Token.stringValue());
if (!GV)
return error(Twine("use of undefined global value '") + Token.range() +
@@ -1492,6 +1504,7 @@ bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
bool MIParser::parseMDNode(MDNode *&Node) {
assert(Token.is(MIToken::exclaim));
+
auto Loc = Token.location();
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
@@ -1507,10 +1520,56 @@ bool MIParser::parseMDNode(MDNode *&Node) {
return false;
}
+bool MIParser::parseDIExpression(MDNode *&Expr) {
+ assert(Token.is(MIToken::md_diexpr));
+ lex();
+
+ // FIXME: Share this parsing with the IL parser.
+ SmallVector<uint64_t, 8> Elements;
+
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ if (Token.isNot(MIToken::rparen)) {
+ do {
+ if (Token.is(MIToken::Identifier)) {
+ if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) {
+ lex();
+ Elements.push_back(Op);
+ continue;
+ }
+ return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
+ }
+
+ if (Token.isNot(MIToken::IntegerLiteral) ||
+ Token.integerValue().isSigned())
+ return error("expected unsigned integer");
+
+ auto &U = Token.integerValue();
+ if (U.ugt(UINT64_MAX))
+ return error("element too large, limit is " + Twine(UINT64_MAX));
+ Elements.push_back(U.getZExtValue());
+ lex();
+
+ } while (consumeIfPresent(MIToken::comma));
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ Expr = DIExpression::get(MF.getFunction().getContext(), Elements);
+ return false;
+}
+
bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
MDNode *Node = nullptr;
- if (parseMDNode(Node))
- return true;
+ if (Token.is(MIToken::exclaim)) {
+ if (parseMDNode(Node))
+ return true;
+ } else if (Token.is(MIToken::md_diexpr)) {
+ if (parseDIExpression(Node))
+ return true;
+ }
Dest = MachineOperand::CreateMetadata(Node);
return false;
}
@@ -1541,6 +1600,21 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
return false;
}
+bool MIParser::parseCFIEscapeValues(std::string &Values) {
+ do {
+ if (Token.isNot(MIToken::HexLiteral))
+ return error("expected a hexadecimal literal");
+ unsigned Value;
+ if (getUnsigned(Value))
+ return true;
+ if (Value > UINT8_MAX)
+ return error("expected a 8-bit integer (too large)");
+ Values.push_back(static_cast<uint8_t>(Value));
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
bool MIParser::parseCFIOperand(MachineOperand &Dest) {
auto Kind = Token.kind();
lex();
@@ -1560,6 +1634,13 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
CFIIndex =
MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
break;
+ case MIToken::kw_cfi_rel_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createRelOffset(nullptr, Reg, Offset));
+ break;
case MIToken::kw_cfi_def_cfa_register:
if (parseCFIRegister(Reg))
return true;
@@ -1573,6 +1654,12 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
break;
+ case MIToken::kw_cfi_adjust_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset));
+ break;
case MIToken::kw_cfi_def_cfa:
if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
parseCFIOffset(Offset))
@@ -1581,6 +1668,42 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
break;
+ case MIToken::kw_cfi_remember_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+ break;
+ case MIToken::kw_cfi_restore:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_restore_state:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+ break;
+ case MIToken::kw_cfi_undefined:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_register: {
+ unsigned Reg2;
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIRegister(Reg2))
+ return true;
+
+ CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2));
+ break;
+ }
+ case MIToken::kw_cfi_window_save:
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
+ break;
+ case MIToken::kw_cfi_escape: {
+ std::string Values;
+ if (parseCFIEscapeValues(Values))
+ return true;
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values));
+ break;
+ }
default:
// TODO: Parse the other CFI operands.
llvm_unreachable("The current token should be a cfi operand");
@@ -1819,6 +1942,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::kw_internal:
case MIToken::kw_early_clobber:
case MIToken::kw_debug_use:
+ case MIToken::kw_renamable:
case MIToken::underscore:
case MIToken::NamedRegister:
case MIToken::VirtualRegister:
@@ -1851,13 +1975,23 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseExternalSymbolOperand(Dest);
case MIToken::SubRegisterIndex:
return parseSubRegisterIndexOperand(Dest);
+ case MIToken::md_diexpr:
case MIToken::exclaim:
return parseMetadataOperand(Dest);
case MIToken::kw_cfi_same_value:
case MIToken::kw_cfi_offset:
+ case MIToken::kw_cfi_rel_offset:
case MIToken::kw_cfi_def_cfa_register:
case MIToken::kw_cfi_def_cfa_offset:
+ case MIToken::kw_cfi_adjust_cfa_offset:
+ case MIToken::kw_cfi_escape:
case MIToken::kw_cfi_def_cfa:
+ case MIToken::kw_cfi_register:
+ case MIToken::kw_cfi_remember_state:
+ case MIToken::kw_cfi_restore:
+ case MIToken::kw_cfi_restore_state:
+ case MIToken::kw_cfi_undefined:
+ case MIToken::kw_cfi_window_save:
return parseCFIOperand(Dest);
case MIToken::kw_blockaddress:
return parseBlockAddressOperand(Dest);
@@ -1968,7 +2102,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
bool MIParser::parseIRValue(const Value *&V) {
switch (Token.kind()) {
case MIToken::NamedIRValue: {
- V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue());
+ V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
break;
}
case MIToken::IRValue: {
@@ -2029,8 +2163,11 @@ bool MIParser::getHexUint(APInt &Result) {
return true;
StringRef V = S.substr(2);
APInt A(V.size()*4, V, 16);
- Result = APInt(A.getActiveBits(),
- ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+
+ // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+ // sure it isn't the case before constructing result.
+ unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+ Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
return false;
}
@@ -2216,9 +2353,15 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
Flags |= MachineMemOperand::MOStore;
lex();
+ // Optional 'store' for operands that both load and store.
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") {
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+ }
+
// Optional synchronization scope.
SyncScope::ID SSID;
- if (parseOptionalScope(MF.getFunction()->getContext(), SSID))
+ if (parseOptionalScope(MF.getFunction().getContext(), SSID))
return true;
// Up to two atomic orderings (cmpxchg provides guarantees on failure).
@@ -2238,7 +2381,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
MachinePointerInfo Ptr = MachinePointerInfo();
if (Token.is(MIToken::Identifier)) {
- const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ const char *Word =
+ ((Flags & MachineMemOperand::MOLoad) &&
+ (Flags & MachineMemOperand::MOStore))
+ ? "on"
+ : Flags & MachineMemOperand::MOLoad ? "from" : "into";
if (Token.stringValue() != Word)
return error(Twine("expected '") + Word + "'");
lex();
@@ -2395,12 +2542,12 @@ static const BasicBlock *getIRBlockFromSlot(
const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
if (Slots2BasicBlocks.empty())
- initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+ initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks);
return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
}
const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
- if (&F == MF.getFunction())
+ if (&F == &MF.getFunction())
return getIRBlock(Slot);
DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
@@ -2431,7 +2578,7 @@ static void initSlots2Values(const Function &F,
const Value *MIParser::getIRValue(unsigned Slot) {
if (Slots2Values.empty())
- initSlots2Values(*MF.getFunction(), Slots2Values);
+ initSlots2Values(MF.getFunction(), Slots2Values);
auto ValueInfo = Slots2Values.find(Slot);
if (ValueInfo == Slots2Values.end())
return nullptr;
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 78b57f357781..7d8e62736a34 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -120,7 +120,7 @@ public:
bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
const yaml::StringValue &RegisterSource,
- int FrameIdx);
+ bool IsRestored, int FrameIdx);
bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineStackObject &Object,
@@ -214,6 +214,9 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
case SourceMgr::DK_Note:
Kind = DS_Note;
break;
+ case SourceMgr::DK_Remark:
+ llvm_unreachable("remark unexpected");
+ break;
}
Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
}
@@ -438,6 +441,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
if (StringRef(VReg.Class.Value).equals("_")) {
Info.Kind = VRegInfo::GENERIC;
+ Info.D.RegBank = nullptr;
} else {
const auto *RC = getRegClass(MF, VReg.Class.Value);
if (RC) {
@@ -547,7 +551,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
MachineFrameInfo &MFI = MF.getFrameInfo();
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
@@ -587,6 +591,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
else
ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+ MFI.setStackID(ObjectIdx, Object.StackID);
if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
ObjectIdx))
.second)
@@ -594,7 +599,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Twine("redefinition of fixed stack object '%fixed-stack.") +
Twine(Object.ID.Value) + "'");
if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
- ObjectIdx))
+ Object.CalleeSavedRestored, ObjectIdx))
return true;
}
@@ -619,13 +624,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Object.Size, Object.Alignment,
Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
+ MFI.setStackID(ObjectIdx, Object.StackID);
+
if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
.second)
return error(Object.ID.SourceRange.Start,
Twine("redefinition of stack object '%stack.") +
Twine(Object.ID.Value) + "'");
if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
- ObjectIdx))
+ Object.CalleeSavedRestored, ObjectIdx))
return true;
if (Object.LocalOffset)
MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
@@ -650,14 +657,16 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
- const yaml::StringValue &RegisterSource, int FrameIdx) {
+ const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) {
if (RegisterSource.Value.empty())
return false;
unsigned Reg = 0;
SMDiagnostic Error;
if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
return error(Error, RegisterSource.SourceRange);
- CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+ CalleeSavedInfo CSI(Reg, FrameIdx);
+ CSI.setRestored(IsRestored);
+ CSIInfo.push_back(CSI);
return false;
}
@@ -713,9 +722,13 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) {
DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots;
const MachineFunction &MF = PFS.MF;
- const auto &M = *MF.getFunction()->getParent();
+ const auto &M = *MF.getFunction().getParent();
SMDiagnostic Error;
for (const auto &YamlConstant : YamlMF.Constants) {
+ if (YamlConstant.IsTargetSpecific)
+ // FIXME: Support target-specific constant pools
+ return error(YamlConstant.Value.SourceRange.Start,
+ "Can't parse target-specific constant pool entries yet");
const Constant *Value = dyn_cast_or_null<Constant>(
parseConstantValue(YamlConstant.Value.Value, Error, M));
if (!Value)
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index ddeacf1d1bfb..3568f96d2b9a 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -12,16 +12,18 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -31,19 +33,20 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
@@ -57,13 +60,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
@@ -75,7 +75,8 @@
using namespace llvm;
-static cl::opt<bool> SimplifyMIR("simplify-mir",
+static cl::opt<bool> SimplifyMIR(
+ "simplify-mir", cl::Hidden,
cl::desc("Leave out unnecessary information when printing MIR"));
namespace {
@@ -156,15 +157,13 @@ public:
void print(const MachineBasicBlock &MBB);
void print(const MachineInstr &MI);
- void printMBBReference(const MachineBasicBlock &MBB);
void printIRBlockReference(const BasicBlock &BB);
void printIRValueReference(const Value &V);
void printStackObjectReference(int FrameIndex);
void printOffset(int64_t Offset);
- void printTargetFlags(const MachineOperand &Op);
- void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
- unsigned I, bool ShouldPrintRegisterTies,
- LLT TypeToPrint, bool IsDef = false);
+ void print(const MachineInstr &MI, unsigned OpIdx,
+ const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
+ LLT TypeToPrint, bool PrintDef = true);
void print(const LLVMContext &Context, const TargetInstrInfo &TII,
const MachineMemOperand &Op);
void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
@@ -192,23 +191,10 @@ template <> struct BlockScalarTraits<Module> {
} // end namespace yaml
} // end namespace llvm
-static void printReg(unsigned Reg, raw_ostream &OS,
- const TargetRegisterInfo *TRI) {
- // TODO: Print Stack Slots.
- if (!Reg)
- OS << '_';
- else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
- else if (Reg < TRI->getNumRegs())
- OS << '%' << StringRef(TRI->getName(Reg)).lower();
- else
- llvm_unreachable("Can't print this kind of register yet");
-}
-
-static void printReg(unsigned Reg, yaml::StringValue &Dest,
- const TargetRegisterInfo *TRI) {
+static void printRegMIR(unsigned Reg, yaml::StringValue &Dest,
+ const TargetRegisterInfo *TRI) {
raw_string_ostream OS(Dest.Value);
- printReg(Reg, OS, TRI);
+ OS << printReg(Reg, TRI);
}
void MIRPrinter::print(const MachineFunction &MF) {
@@ -227,8 +213,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::Selected);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
- ModuleSlotTracker MST(MF.getFunction()->getParent());
- MST.incorporateFunction(*MF.getFunction());
+ ModuleSlotTracker MST(MF.getFunction().getParent());
+ MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
if (const auto *ConstantPool = MF.getConstantPool())
@@ -262,7 +248,7 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
if (RegMask[I / 32] & (1u << (I % 32))) {
if (IsRegInRegMaskFound)
OS << ',';
- printReg(I, OS, TRI);
+ OS << printReg(I, TRI);
IsRegInRegMaskFound = true;
}
}
@@ -270,6 +256,14 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
OS << ')';
}
+static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ OS << printRegClassOrBank(Reg, RegInfo, TRI);
+}
+
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
@@ -280,28 +274,19 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
- if (RegInfo.getRegClassOrNull(Reg))
- VReg.Class =
- StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
- else if (RegInfo.getRegBankOrNull(Reg))
- VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
- else {
- VReg.Class = std::string("_");
- assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
- "Generic registers must have a valid type");
- }
+ ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
if (PreferredReg)
- printReg(PreferredReg, VReg.PreferredRegister, TRI);
+ printRegMIR(PreferredReg, VReg.PreferredRegister, TRI);
MF.VirtualRegisters.push_back(VReg);
}
// Print the live ins.
- for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+ for (std::pair<unsigned, unsigned> LI : RegInfo.liveins()) {
yaml::MachineFunctionLiveIn LiveIn;
- printReg(I->first, LiveIn.Register, TRI);
- if (I->second)
- printReg(I->second, LiveIn.VirtualRegister, TRI);
+ printRegMIR(LI.first, LiveIn.Register, TRI);
+ if (LI.second)
+ printRegMIR(LI.second, LiveIn.VirtualRegister, TRI);
MF.LiveIns.push_back(LiveIn);
}
@@ -311,7 +296,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
yaml::FlowStringValue Reg;
- printReg(*I, Reg, TRI);
+ printRegMIR(*I, Reg, TRI);
CalleeSavedRegisters.push_back(Reg);
}
MF.CalleeSavedRegisters = CalleeSavedRegisters;
@@ -337,13 +322,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
if (MFI.getSavePoint()) {
raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
- MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
- .printMBBReference(*MFI.getSavePoint());
+ StrOS << printMBBReference(*MFI.getSavePoint());
}
if (MFI.getRestorePoint()) {
raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
- MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
- .printMBBReference(*MFI.getRestorePoint());
+ StrOS << printMBBReference(*MFI.getRestorePoint());
}
}
@@ -366,6 +349,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.StackID = MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
YMF.FixedStackObjects.push_back(YamlObject);
@@ -392,6 +376,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.StackID = MFI.getStackID(I);
YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
@@ -400,15 +385,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
yaml::StringValue Reg;
- printReg(CSInfo.getReg(), Reg, TRI);
+ printRegMIR(CSInfo.getReg(), Reg, TRI);
auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
assert(StackObjectInfo != StackObjectOperandMapping.end() &&
"Invalid stack object index");
const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed)
+ if (StackObject.IsFixed) {
YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- else
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ } else {
YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+ CSInfo.isRestored();
+ }
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
auto LocalObject = MFI.getLocalFrameObjectMap(I);
@@ -456,17 +446,20 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineConstantPool &ConstantPool) {
unsigned ID = 0;
for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
- // TODO: Serialize target specific constant pool entries.
- if (Constant.isMachineConstantPoolEntry())
- llvm_unreachable("Can't print target specific constant pool entries yet");
-
- yaml::MachineConstantPoolValue YamlConstant;
std::string Str;
raw_string_ostream StrOS(Str);
- Constant.Val.ConstVal->printAsOperand(StrOS);
+ if (Constant.isMachineConstantPoolEntry()) {
+ Constant.Val.MachineCPVal->print(StrOS);
+ } else {
+ Constant.Val.ConstVal->printAsOperand(StrOS);
+ }
+
+ yaml::MachineConstantPoolValue YamlConstant;
YamlConstant.ID = ID++;
YamlConstant.Value = StrOS.str();
YamlConstant.Alignment = Constant.getAlignment();
+ YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry();
+
MF.Constants.push_back(YamlConstant);
}
}
@@ -482,8 +475,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
Entry.ID = ID++;
for (const auto *MBB : Table.MBBs) {
raw_string_ostream StrOS(Str);
- MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
- .printMBBReference(*MBB);
+ StrOS << printMBBReference(*MBB);
Entry.Blocks.push_back(StrOS.str());
Str.clear();
}
@@ -593,13 +585,19 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
bool HasLineAttributes = false;
// Print the successors
bool canPredictProbs = canPredictBranchProbabilities(MBB);
- if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs ||
- !canPredictSuccessors(MBB))) {
+ // Even if the list of successors is empty, if we cannot guess it,
+ // we need to print it to tell the parser that the list is empty.
+ // This is needed, because MI model unreachable as empty blocks
+ // with an empty successor list. If the parser would see that
+ // without the successor list, it would guess the code would
+ // fallthrough.
+ if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs ||
+ !canPredictSuccessors(MBB)) {
OS.indent(2) << "successors: ";
for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
if (I != MBB.succ_begin())
OS << ", ";
- printMBBReference(**I);
+ OS << printMBBReference(**I);
if (!SimplifyMIR || !canPredictProbs)
OS << '('
<< format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
@@ -619,7 +617,7 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
if (!First)
OS << ", ";
First = false;
- printReg(LI.PhysReg, OS, &TRI);
+ OS << printReg(LI.PhysReg, &TRI);
if (!LI.LaneMask.all())
OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
@@ -648,46 +646,8 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
OS.indent(2) << "}\n";
}
-/// Return true when an instruction has tied register that can't be determined
-/// by the instruction's descriptor.
-static bool hasComplexRegisterTies(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- const auto &Operand = MI.getOperand(I);
- if (!Operand.isReg() || Operand.isDef())
- // Ignore the defined registers as MCID marks only the uses as tied.
- continue;
- int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
- int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
- if (ExpectedTiedIdx != TiedIdx)
- return true;
- }
- return false;
-}
-
-static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
- SmallBitVector &PrintedTypes,
- const MachineRegisterInfo &MRI) {
- const MachineOperand &Op = MI.getOperand(OpIdx);
- if (!Op.isReg())
- return LLT{};
-
- if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
- return MRI.getType(Op.getReg());
-
- auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
- if (!OpInfo.isGenericType())
- return MRI.getType(Op.getReg());
-
- if (PrintedTypes[OpInfo.getGenericTypeIndex()])
- return LLT{};
-
- PrintedTypes.set(OpInfo.getGenericTypeIndex());
- return MRI.getType(Op.getReg());
-}
-
void MIPrinter::print(const MachineInstr &MI) {
- const auto *MF = MI.getParent()->getParent();
+ const auto *MF = MI.getMF();
const auto &MRI = MF->getRegInfo();
const auto &SubTarget = MF->getSubtarget();
const auto *TRI = SubTarget.getRegisterInfo();
@@ -698,16 +658,16 @@ void MIPrinter::print(const MachineInstr &MI) {
assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
SmallBitVector PrintedTypes(8);
- bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
+ bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies();
unsigned I = 0, E = MI.getNumOperands();
for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
!MI.getOperand(I).isImplicit();
++I) {
if (I)
OS << ", ";
- print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
- getTypeToPrint(MI, I, PrintedTypes, MRI),
- /*IsDef=*/true);
+ print(MI, I, TRI, ShouldPrintRegisterTies,
+ MI.getTypeToPrint(I, PrintedTypes, MRI),
+ /*PrintDef=*/false);
}
if (I)
@@ -722,8 +682,8 @@ void MIPrinter::print(const MachineInstr &MI) {
for (; I < E; ++I) {
if (NeedComma)
OS << ", ";
- print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
- getTypeToPrint(MI, I, PrintedTypes, MRI));
+ print(MI, I, TRI, ShouldPrintRegisterTies,
+ MI.getTypeToPrint(I, PrintedTypes, MRI));
NeedComma = true;
}
@@ -736,7 +696,7 @@ void MIPrinter::print(const MachineInstr &MI) {
if (!MI.memoperands_empty()) {
OS << " :: ";
- const LLVMContext &Context = MF->getFunction()->getContext();
+ const LLVMContext &Context = MF->getFunction().getContext();
bool NeedComma = false;
for (const auto *Op : MI.memoperands()) {
if (NeedComma)
@@ -747,14 +707,6 @@ void MIPrinter::print(const MachineInstr &MI) {
}
}
-void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
- OS << "%bb." << MBB.getNumber();
- if (const auto *BB = MBB.getBasicBlock()) {
- if (BB->hasName())
- OS << '.' << BB->getName();
- }
-}
-
static void printIRSlotNumber(raw_ostream &OS, int Slot) {
if (Slot == -1)
OS << "<badref>";
@@ -806,13 +758,8 @@ void MIPrinter::printStackObjectReference(int FrameIndex) {
assert(ObjectInfo != StackObjectOperandMapping.end() &&
"Invalid frame index");
const FrameIndexOperand &Operand = ObjectInfo->second;
- if (Operand.IsFixed) {
- OS << "%fixed-stack." << Operand.ID;
- return;
- }
- OS << "%stack." << Operand.ID;
- if (!Operand.Name.empty())
- OS << '.' << Operand.Name;
+ MachineOperand::printStackObjectReference(OS, Operand.ID, Operand.IsFixed,
+ Operand.Name);
}
void MIPrinter::printOffset(int64_t Offset) {
@@ -825,154 +772,43 @@ void MIPrinter::printOffset(int64_t Offset) {
OS << " + " << Offset;
}
-static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
- auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
- for (const auto &I : Flags) {
- if (I.first == TF) {
- return I.second;
- }
- }
- return nullptr;
-}
-
-void MIPrinter::printTargetFlags(const MachineOperand &Op) {
- if (!Op.getTargetFlags())
- return;
- const auto *TII =
- Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
- assert(TII && "expected instruction info");
- auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
- OS << "target-flags(";
- const bool HasDirectFlags = Flags.first;
- const bool HasBitmaskFlags = Flags.second;
- if (!HasDirectFlags && !HasBitmaskFlags) {
- OS << "<unknown>) ";
- return;
- }
- if (HasDirectFlags) {
- if (const auto *Name = getTargetFlagName(TII, Flags.first))
- OS << Name;
- else
- OS << "<unknown target flag>";
- }
- if (!HasBitmaskFlags) {
- OS << ") ";
- return;
- }
- bool IsCommaNeeded = HasDirectFlags;
- unsigned BitMask = Flags.second;
- auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
- for (const auto &Mask : BitMasks) {
- // Check if the flag's bitmask has the bits of the current mask set.
- if ((BitMask & Mask.first) == Mask.first) {
- if (IsCommaNeeded)
- OS << ", ";
- IsCommaNeeded = true;
- OS << Mask.second;
- // Clear the bits which were serialized from the flag's bitmask.
- BitMask &= ~(Mask.first);
- }
- }
- if (BitMask) {
- // When the resulting flag's bitmask isn't zero, we know that we didn't
- // serialize all of the bit flags.
- if (IsCommaNeeded)
- OS << ", ";
- OS << "<unknown bitmask target flag>";
- }
- OS << ") ";
-}
-
-static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "expected instruction info");
- auto Indices = TII->getSerializableTargetIndices();
- for (const auto &I : Indices) {
- if (I.first == Index) {
- return I.second;
- }
- }
- return nullptr;
-}
-
-void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
- unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint,
- bool IsDef) {
- printTargetFlags(Op);
+void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
+ const TargetRegisterInfo *TRI,
+ bool ShouldPrintRegisterTies, LLT TypeToPrint,
+ bool PrintDef) {
+ const MachineOperand &Op = MI.getOperand(OpIdx);
switch (Op.getType()) {
- case MachineOperand::MO_Register:
- if (Op.isImplicit())
- OS << (Op.isDef() ? "implicit-def " : "implicit ");
- else if (!IsDef && Op.isDef())
- // Print the 'def' flag only when the operand is defined after '='.
- OS << "def ";
- if (Op.isInternalRead())
- OS << "internal ";
- if (Op.isDead())
- OS << "dead ";
- if (Op.isKill())
- OS << "killed ";
- if (Op.isUndef())
- OS << "undef ";
- if (Op.isEarlyClobber())
- OS << "early-clobber ";
- if (Op.isDebug())
- OS << "debug-use ";
- printReg(Op.getReg(), OS, TRI);
- // Print the sub register.
- if (Op.getSubReg() != 0)
- OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
- if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
- OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
- if (TypeToPrint.isValid())
- OS << '(' << TypeToPrint << ')';
- break;
case MachineOperand::MO_Immediate:
- OS << Op.getImm();
- break;
+ if (MI.isOperandSubregIdx(OpIdx)) {
+ MachineOperand::printTargetFlags(OS, Op);
+ MachineOperand::printSubregIdx(OS, Op.getImm(), TRI);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case MachineOperand::MO_Register:
case MachineOperand::MO_CImmediate:
- Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
- break;
- case MachineOperand::MO_FPImmediate:
- Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
- break;
case MachineOperand::MO_MachineBasicBlock:
- printMBBReference(*Op.getMBB());
- break;
- case MachineOperand::MO_FrameIndex:
- printStackObjectReference(Op.getIndex());
- break;
case MachineOperand::MO_ConstantPoolIndex:
- OS << "%const." << Op.getIndex();
- printOffset(Op.getOffset());
- break;
case MachineOperand::MO_TargetIndex:
- OS << "target-index(";
- if (const auto *Name = getTargetIndexName(
- *Op.getParent()->getParent()->getParent(), Op.getIndex()))
- OS << Name;
- else
- OS << "<unknown>";
- OS << ')';
- printOffset(Op.getOffset());
- break;
case MachineOperand::MO_JumpTableIndex:
- OS << "%jump-table." << Op.getIndex();
- break;
- case MachineOperand::MO_ExternalSymbol: {
- StringRef Name = Op.getSymbolName();
- OS << '$';
- if (Name.empty()) {
- OS << "\"\"";
- } else {
- printLLVMNameWithoutPrefix(OS, Name);
- }
- printOffset(Op.getOffset());
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol: {
+ unsigned TiedOperandIdx = 0;
+ if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
+ TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
+ const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
+ Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies,
+ TiedOperandIdx, TRI, TII);
break;
}
- case MachineOperand::MO_GlobalAddress:
- Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
- printOffset(Op.getOffset());
+ case MachineOperand::MO_FPImmediate:
+ Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FrameIndex:
+ printStackObjectReference(Op.getIndex());
break;
case MachineOperand::MO_BlockAddress:
OS << "blockaddress(";
@@ -991,29 +827,8 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
printCustomRegMask(Op.getRegMask(), OS, TRI);
break;
}
- case MachineOperand::MO_RegisterLiveOut: {
- const uint32_t *RegMask = Op.getRegLiveOut();
- OS << "liveout(";
- bool IsCommaNeeded = false;
- for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
- if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
- if (IsCommaNeeded)
- OS << ", ";
- printReg(Reg, OS, TRI);
- IsCommaNeeded = true;
- }
- }
- OS << ")";
- break;
- }
- case MachineOperand::MO_Metadata:
- Op.getMetadata()->printAsOperand(OS, MST);
- break;
- case MachineOperand::MO_MCSymbol:
- OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
- break;
case MachineOperand::MO_CFIIndex: {
- const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+ const MachineFunction &MF = *Op.getParent()->getMF();
print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
break;
}
@@ -1022,7 +837,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
if (ID < Intrinsic::num_intrinsics)
OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
else {
- const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+ const MachineFunction &MF = *Op.getParent()->getMF();
const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
OS << "intrinsic(@" << TII->getName(ID) << ')';
}
@@ -1068,12 +883,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
if (Op.getFlags() & MachineMemOperand::MOTargetFlag3)
OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3)
<< "\" ";
+
+ assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)");
if (Op.isLoad())
OS << "load ";
- else {
- assert(Op.isStore() && "Non load machine operand must be a store");
+ if (Op.isStore())
OS << "store ";
- }
printSyncScope(Context, Op.getSyncScopeID());
@@ -1084,10 +899,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
OS << Op.getSize();
if (const Value *Val = Op.getValue()) {
- OS << (Op.isLoad() ? " from " : " into ");
+ OS << ((Op.isLoad() && Op.isStore()) ? " on "
+ : Op.isLoad() ? " from " : " into ");
printIRValueReference(*Val);
} else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) {
- OS << (Op.isLoad() ? " from " : " into ");
+ OS << ((Op.isLoad() && Op.isStore()) ? " on "
+ : Op.isLoad() ? " from " : " into ");
assert(PVal && "Expected a pseudo source value");
switch (PVal->kind()) {
case PseudoSourceValue::Stack:
@@ -1168,7 +985,7 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
OS << "<badreg>";
return;
}
- printReg(Reg, OS, TRI);
+ OS << printReg(Reg, TRI);
}
void MIPrinter::print(const MCCFIInstruction &CFI,
@@ -1176,36 +993,96 @@ void MIPrinter::print(const MCCFIInstruction &CFI,
switch (CFI.getOperation()) {
case MCCFIInstruction::OpSameValue:
OS << "same_value ";
- if (CFI.getLabel())
- OS << "<mcsymbol> ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
printCFIRegister(CFI.getRegister(), OS, TRI);
break;
+ case MCCFIInstruction::OpRememberState:
+ OS << "remember_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OS << "restore_state ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
case MCCFIInstruction::OpOffset:
OS << "offset ";
- if (CFI.getLabel())
- OS << "<mcsymbol> ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
printCFIRegister(CFI.getRegister(), OS, TRI);
OS << ", " << CFI.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister:
OS << "def_cfa_register ";
- if (CFI.getLabel())
- OS << "<mcsymbol> ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
printCFIRegister(CFI.getRegister(), OS, TRI);
break;
case MCCFIInstruction::OpDefCfaOffset:
OS << "def_cfa_offset ";
- if (CFI.getLabel())
- OS << "<mcsymbol> ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
OS << CFI.getOffset();
break;
case MCCFIInstruction::OpDefCfa:
OS << "def_cfa ";
- if (CFI.getLabel())
- OS << "<mcsymbol> ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
printCFIRegister(CFI.getRegister(), OS, TRI);
OS << ", " << CFI.getOffset();
break;
+ case MCCFIInstruction::OpRelOffset:
+ OS << "rel_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OS << "adjust_cfa_offset ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRestore:
+ OS << "restore ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpEscape: {
+ OS << "escape ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ if (!CFI.getValues().empty()) {
+ size_t e = CFI.getValues().size() - 1;
+ for (size_t i = 0; i < e; ++i)
+ OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
+ OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", ";
+ }
+ break;
+ }
+ case MCCFIInstruction::OpUndefined:
+ OS << "undefined ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpRegister:
+ OS << "register ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", ";
+ printCFIRegister(CFI.getRegister2(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OS << "window_save ";
+ if (MCSymbol *Label = CFI.getLabel())
+ MachineOperand::printSymbol(OS, *Label);
+ break;
default:
// TODO: Print the other CFI Operations.
OS << "<unserializable cfi operation>";
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 09354cf70c3c..1a8427430ea0 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 81597afe6b02..209abf34d885 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -13,7 +13,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -21,6 +21,9 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -30,10 +33,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -42,6 +42,8 @@ using namespace llvm;
MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
: BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
+ if (B)
+ IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
}
MachineBasicBlock::~MachineBasicBlock() {
@@ -68,6 +70,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
return OS;
}
+Printable llvm::printMBBReference(const MachineBasicBlock &MBB) {
+ return Printable([&MBB](raw_ostream &OS) { return MBB.printAsOperand(OS); });
+}
+
/// When an MBB is added to an MF, we need to update the parent pointer of the
/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
/// operand list for registers.
@@ -111,7 +117,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() && "machine instruction not in a basic block");
// Remove from the use/def lists.
- if (MachineFunction *MF = N->getParent()->getParent())
+ if (MachineFunction *MF = N->getMF())
N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(nullptr);
@@ -261,8 +267,8 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
<< " is null\n";
return;
}
- const Function *F = MF->getFunction();
- const Module *M = F ? F->getParent() : nullptr;
+ const Function &F = MF->getFunction();
+ const Module *M = F.getParent();
ModuleSlotTracker MST(M);
print(OS, MST, Indexes);
}
@@ -279,7 +285,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
- OS << "BB#" << getNumber() << ": ";
+ OS << printMBBReference(*this) << ": ";
const char *Comma = "";
if (const BasicBlock *LBB = getBasicBlock()) {
@@ -300,7 +306,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
OS << " Live Ins:";
for (const auto &LI : LiveIns) {
- OS << ' ' << PrintReg(LI.PhysReg, TRI);
+ OS << ' ' << printReg(LI.PhysReg, TRI);
if (!LI.LaneMask.all())
OS << ':' << PrintLaneMask(LI.LaneMask);
}
@@ -311,7 +317,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
OS << " Predecessors according to CFG:";
for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
- OS << " BB#" << (*PI)->getNumber();
+ OS << " " << printMBBReference(*(*PI));
OS << '\n';
}
@@ -332,17 +338,23 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
- OS << " BB#" << (*SI)->getNumber();
+ OS << " " << printMBBReference(*(*SI));
if (!Probs.empty())
OS << '(' << *getProbabilityIterator(SI) << ')';
}
OS << '\n';
}
+ if (IrrLoopHeaderWeight) {
+ if (Indexes) OS << '\t';
+ OS << " Irreducible loop header weight: "
+ << IrrLoopHeaderWeight.getValue();
+ OS << '\n';
+ }
}
void MachineBasicBlock::printAsOperand(raw_ostream &OS,
bool /*PrintType*/) const {
- OS << "BB#" << getNumber();
+ OS << "%bb." << getNumber();
}
void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
@@ -759,10 +771,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
- DEBUG(dbgs() << "Splitting critical edge:"
- " BB#" << getNumber()
- << " -- BB#" << NMBB->getNumber()
- << " -- BB#" << Succ->getNumber() << '\n');
+ DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
+ << " -- " << printMBBReference(*NMBB) << " -- "
+ << printMBBReference(*Succ) << '\n');
LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
@@ -1015,8 +1026,8 @@ bool MachineBasicBlock::canSplitCriticalEdge(
// case that we can't handle. Since this never happens in properly optimized
// code, just skip those edges.
if (TBB && TBB == FBB) {
- DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
- << getNumber() << '\n');
+ DEBUG(dbgs() << "Won't split critical edge after degenerate "
+ << printMBBReference(*this) << '\n');
return false;
}
return true;
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 4d1ec11df46c..3459a9f71a73 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -12,23 +12,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
+#include <string>
using namespace llvm;
#define DEBUG_TYPE "machine-block-freq"
-
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
cl::desc("Pop up a window to show a dag displaying how machine block "
@@ -42,6 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
"profile count if available.")));
+
// Similar option above, but used to control BFI display only after MBP pass
cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
"view-block-layout-with-bfi", cl::Hidden,
@@ -62,10 +63,19 @@ cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
// Command line option to specify the name of the function for CFG dump
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+
// Command line option to specify hot frequency threshold.
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
extern cl::opt<unsigned> ViewHotFreqPercent;
+static cl::opt<bool> PrintMachineBlockFreq(
+ "print-machine-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print the machine block frequency info."));
+
+// Command line option to specify the name of the function for block frequency
+// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
+extern cl::opt<std::string> PrintBlockFreqFuncName;
+
static GVDAGType getGVDT() {
if (ViewBlockLayoutWithBFI != GVDT_None)
return ViewBlockLayoutWithBFI;
@@ -76,9 +86,9 @@ static GVDAGType getGVDT() {
namespace llvm {
template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
- typedef const MachineBasicBlock *NodeRef;
- typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
- typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
+ using NodeRef = const MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
+ using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;
static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
return &G->getFunction()->front();
@@ -99,21 +109,21 @@ template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
}
};
-typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
- MachineBranchProbabilityInfo>
- MBFIDOTGraphTraitsBase;
+using MBFIDOTGraphTraitsBase =
+ BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
+ MachineBranchProbabilityInfo>;
+
template <>
struct DOTGraphTraits<MachineBlockFrequencyInfo *>
: public MBFIDOTGraphTraitsBase {
- explicit DOTGraphTraits(bool isSimple = false)
- : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {}
-
- const MachineFunction *CurFunc;
+ const MachineFunction *CurFunc = nullptr;
DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
+ explicit DOTGraphTraits(bool isSimple = false)
+ : MBFIDOTGraphTraitsBase(isSimple) {}
+
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineBlockFrequencyInfo *Graph) {
-
int layout_order = -1;
// Attach additional ordering information if 'isSimple' is false.
if (!isSimple()) {
@@ -163,7 +173,7 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
}
-MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {}
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
@@ -183,6 +193,11 @@ void MachineBlockFrequencyInfo::calculate(
F.getName().equals(ViewBlockFreqFuncName))) {
view("MachineBlockFrequencyDAGS." + F.getName());
}
+ if (PrintMachineBlockFreq &&
+ (PrintBlockFreqFuncName.empty() ||
+ F.getName().equals(PrintBlockFreqFuncName))) {
+ MBFI->print(dbgs());
+ }
}
bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
@@ -209,14 +224,20 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
const MachineBasicBlock *MBB) const {
- const Function *F = MBFI->getFunction()->getFunction();
- return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
+ const Function &F = MBFI->getFunction()->getFunction();
+ return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None;
}
Optional<uint64_t>
MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
- const Function *F = MBFI->getFunction()->getFunction();
- return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
+ const Function &F = MBFI->getFunction()->getFunction();
+ return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None;
+}
+
+bool
+MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
+ assert(MBFI && "Expected analysis to be available");
+ return MBFI->isIrrLoopHeader(MBB);
}
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 447ad629885b..4ce689607730 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,4 +1,4 @@
-//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,7 +26,10 @@
//===----------------------------------------------------------------------===//
#include "BranchFolding.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -39,19 +42,33 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
-#include <functional>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <tuple>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "block-placement"
@@ -91,11 +108,17 @@ static cl::opt<unsigned> LoopToColdBlockRatio(
"(frequency of block) is greater than this ratio"),
cl::init(5), cl::Hidden);
+static cl::opt<bool> ForceLoopColdBlock(
+ "force-loop-cold-block",
+ cl::desc("Force outlining cold blocks from loops."),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool>
PreciseRotationCost("precise-rotation-cost",
cl::desc("Model the cost of loop rotation more "
"precisely by using profile data."),
cl::init(false), cl::Hidden);
+
static cl::opt<bool>
ForcePreciseRotationCost("force-precise-rotation-cost",
cl::desc("Force the use of precise cost "
@@ -138,7 +161,7 @@ static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
"tail-dup-placement-aggressive-threshold",
cl::desc("Instruction cutoff for aggressive tail duplication during "
"layout. Used at -O3. Tail merging during layout is forced to "
- "have a threshold that won't conflict."), cl::init(3),
+ "have a threshold that won't conflict."), cl::init(4),
cl::Hidden);
// Heuristic for tail duplication.
@@ -172,12 +195,12 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
extern cl::opt<std::string> ViewBlockFreqFuncName;
namespace {
+
class BlockChain;
+
/// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
-}
+using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
-namespace {
/// \brief A chain of blocks which will be laid out contiguously.
///
/// This is the datastructure representing a chain of consecutive blocks that
@@ -211,14 +234,14 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
/// \brief Iterator over blocks within the chain.
- typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
- typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
+ using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
+ using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
/// \brief Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
@@ -286,14 +309,12 @@ public:
///
/// Note: This field is reinitialized multiple times - once for each loop,
/// and then once for the function as a whole.
- unsigned UnscheduledPredecessors;
+ unsigned UnscheduledPredecessors = 0;
};
-}
-namespace {
class MachineBlockPlacement : public MachineFunctionPass {
- /// \brief A typedef for a block filter set.
- typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+ /// \brief A type for a block filter set.
+ using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
/// Pair struct containing basic block and taildup profitiability
struct BlockAndTailDupResult {
@@ -428,6 +449,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
void fillWorkLists(const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
+
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(
@@ -454,31 +476,37 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
BranchProbability AdjustedSumProb,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Check for a trellis layout.
bool isTrellis(const MachineBasicBlock *BB,
const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Get the best successor given a trellis layout.
BlockAndTailDupResult getBestTrellisSuccessor(
const MachineBasicBlock *BB,
const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
BranchProbability AdjustedSumProb, const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
+
/// Get the best pair of non-conflicting edges.
static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
const MachineBasicBlock *BB,
MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+
/// Returns true if a block can tail duplicate into all unplaced
/// predecessors. Filters based on loop.
bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB, MachineBasicBlock *Succ,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
/// Find chains of triangles to tail-duplicate where a global analysis works,
/// but a local analysis would not find them.
void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
+
MachineBlockPlacement() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
}
@@ -495,10 +523,13 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char MachineBlockPlacement::ID = 0;
+
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -515,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
static std::string getBlockName(const MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber();
+ OS << printMBBReference(*BB);
OS << " ('" << BB->getName() << "')";
OS.flush();
return Result;
@@ -1094,6 +1125,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
void MachineBlockPlacement::precomputeTriangleChains() {
struct TriangleChain {
std::vector<MachineBasicBlock *> Edges;
+
TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
: Edges({src, dst}) {}
@@ -1203,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
const MachineBasicBlock *BB) {
- if (!BB->getParent()->getFunction()->getEntryCount())
+ if (!BB->getParent()->getFunction().getEntryCount())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
const MachineBasicBlock *Succ1 = *BB->succ_begin();
@@ -1534,10 +1566,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(remove_if(WorkList,
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ WorkList.erase(llvm::remove_if(WorkList,
+ [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
if (WorkList.empty())
@@ -1659,7 +1691,7 @@ void MachineBlockPlacement::buildChain(
const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
MachineBasicBlock *BB = *std::prev(Chain.end());
- for (;;) {
+ while (true) {
assert(BB && "null block found at end of chain in loop.");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
@@ -1737,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
- if (F->getFunction()->optForSize())
+ if (F->getFunction().optForSize())
return L.getHeader();
// Check that the header hasn't been fused with a preheader block due to
@@ -1945,7 +1977,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
}
}
- BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
+ BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -1999,7 +2031,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
- auto HeaderIter = find(LoopChain, HeaderBB);
+ auto HeaderIter = llvm::find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2146,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F->getFunction()->getEntryCount()) {
+ if (F->getFunction().getEntryCount() || ForceLoopColdBlock) {
BlockFrequency LoopFreq(0);
for (auto LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -2188,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// for better layout.
bool RotateLoopWithProfile =
ForcePreciseRotationCost ||
- (PreciseRotationCost && F->getFunction()->getEntryCount());
+ (PreciseRotationCost && F->getFunction().getEntryCount());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
@@ -2201,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
+ //
+ // Loops are processed innermost to uttermost, make sure we clear
+ // PreferredLoopExit before processing a new loop.
+ PreferredLoopExit = nullptr;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
@@ -2272,7 +2308,7 @@ void MachineBlockPlacement::buildCFGChains() {
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
- for (;;) {
+ while (true) {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
@@ -2313,7 +2349,7 @@ void MachineBlockPlacement::buildCFGChains() {
buildChain(&F->front(), FunctionChain);
#ifndef NDEBUG
- typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+ using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
#endif
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -2449,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction()->optForSize())
+ if (F->getFunction().optForSize())
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
@@ -2545,8 +2581,8 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
// duplicated from here on are already scheduled.
// Note that DuplicatedToLPred always implies Removed.
while (DuplicatedToLPred) {
- assert (Removed && "Block must have been removed to be duplicated into its "
- "layout predecessor.");
+ assert(Removed && "Block must have been removed to be duplicated into its "
+ "layout predecessor.");
MachineBasicBlock *DupBB, *DupPred;
// The removal callback causes Chain.end() to be updated when a block is
// removed. On the first pass through the loop, the chain end should be the
@@ -2629,8 +2665,10 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
RemoveList.erase(
- remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+ llvm::remove_if(RemoveList,
+ [RemBB](MachineBasicBlock *BB) {
+ return BB == RemBB;
+ }),
RemoveList.end());
}
@@ -2648,7 +2686,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
<< getBlockName(RemBB) << "\n");
};
auto RemovalCallbackRef =
- llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+ function_ref<void(MachineBasicBlock*)>(RemovalCallback);
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
bool IsSimple = TailDup.isSimpleBB(BB);
@@ -2677,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
// Check for single-block functions and skip them.
@@ -2722,9 +2760,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (TailDupPlacement) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction()->optForSize())
+ if (MF.getFunction().optForSize())
TailDupSize = 1;
- TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ bool PreRegAlloc = false;
+ TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -2778,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
- F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+ F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
MBFI->view("MBP." + MF.getName(), false);
}
@@ -2789,6 +2828,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
namespace {
+
/// \brief A pass to compute block placement statistics.
///
/// A separate pass to compute interesting statistics for evaluating block
@@ -2804,6 +2844,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
+
MachineBlockPlacementStats() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
}
@@ -2817,10 +2858,13 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char MachineBlockPlacementStats::ID = 0;
+
char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+
INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
"Basic Block Placement Stats", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 21eff9dfff9c..e4952aaaba06 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -84,7 +84,7 @@ raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
const MachineBasicBlock *Dst) const {
const BranchProbability Prob = getEdgeProbability(Src, Dst);
- OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ OS << "edge " << printMBBReference(*Src) << " -> " << printMBBReference(*Dst)
<< " probability is " << Prob
<< (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 582ff139f886..53c0d840ac84 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -1,4 +1,4 @@
-//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,18 +15,35 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "machine-cse"
@@ -40,15 +57,18 @@ STATISTIC(NumCrossBBCSEs,
STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
+
class MachineCSE : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
AliasAnalysis *AA;
MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
+
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
+
+ MachineCSE() : MachineFunctionPass(ID) {
initializeMachineCSEPass(*PassRegistry::getPassRegistry());
}
@@ -69,16 +89,18 @@ namespace {
}
private:
- unsigned LookAheadLimit;
- typedef RecyclingAllocator<BumpPtrAllocator,
- ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
- typedef ScopedHashTable<MachineInstr*, unsigned,
- MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
- typedef ScopedHTType::ScopeTy ScopeType;
- DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+ using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr *, unsigned>>;
+ using ScopedHTType =
+ ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
+ AllocatorTy>;
+ using ScopeType = ScopedHTType::ScopeTy;
+
+ unsigned LookAheadLimit = 0;
+ DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
ScopedHTType VNT;
- SmallVector<MachineInstr*, 64> Exps;
- unsigned CurrVN;
+ SmallVector<MachineInstr *, 64> Exps;
+ unsigned CurrVN = 0;
bool PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB);
@@ -104,10 +126,13 @@ namespace {
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
};
+
} // end anonymous namespace
char MachineCSE::ID = 0;
+
char &llvm::MachineCSEID = MachineCSE::ID;
+
INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
"Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -225,8 +250,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- // Reading constant physregs is ok.
- if (!MRI->isConstantPhysReg(Reg))
+ // Reading either caller preserved or constant physregs is ok.
+ if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
@@ -598,12 +623,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Go through implicit defs of CSMI and MI, and clear the kill flags on
// their uses in all the instructions between CSMI and MI.
// We might have made some of the kill flags redundant, consider:
- // subs ... %NZCV<imp-def> <- CSMI
- // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore
- // subs ... %NZCV<imp-def> <- MI, to be eliminated
- // csinc ... %NZCV<imp-use,kill>
+ // subs ... implicit-def %nzcv <- CSMI
+ // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
+ // subs ... implicit-def %nzcv <- MI, to be eliminated
+ // csinc ... implicit killed %nzcv
// Since we eliminated MI, and reused a register imp-def'd by CSMI
- // (here %NZCV), that register, if it was killed before MI, should have
+ // (here %nzcv), that register, if it was killed before MI, should have
// that kill flag removed, because it's lifetime was extended.
if (CSMI->getParent() == MI->getParent()) {
for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
@@ -702,7 +727,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
}
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index e6f80dbb8630..702d21228477 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -16,17 +16,17 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -34,6 +34,11 @@ using namespace llvm;
STATISTIC(NumInstCombined, "Number of machineinst combined");
+static cl::opt<unsigned>
+inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
+ cl::desc("Incremental depth computation will be used for basic "
+ "blocks with more instructions."), cl::init(500));
+
namespace {
class MachineCombiner : public MachineFunctionPass {
const TargetInstrInfo *TII;
@@ -73,7 +78,7 @@ private:
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern);
+ MachineCombinerPattern Pattern, bool SlackIsAccurate);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -155,9 +160,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
assert(DefInstr &&
"There must be a definition for a new virtual register");
DepthOp = InstrDepth[II->second];
- LatencyOp = TSchedModel.computeOperandLatency(
- DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
- InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
+ int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
+ LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
+ InstrPtr, UseIdx);
} else {
MachineInstr *DefInstr = getOperandDef(MO);
if (DefInstr) {
@@ -247,7 +253,8 @@ bool MachineCombiner::improvesCriticalPathLen(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern) {
+ MachineCombinerPattern Pattern,
+ bool SlackIsAccurate) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
"Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
@@ -258,7 +265,7 @@ bool MachineCombiner::improvesCriticalPathLen(
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
dbgs() << " RootDepth: " << RootDepth << "\n");
@@ -274,24 +281,32 @@ bool MachineCombiner::improvesCriticalPathLen(
// of the original code sequence. This may allow the transform to proceed
// even if the instruction depths (data dependency cycles) become worse.
- unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
- unsigned RootLatency = 0;
+ // Account for the latency of the inserted and deleted instructions by
+ // adding up their latencies. This assumes that the inserted and deleted
+ // instructions are dependent instruction chains, which might not hold
+ // in all cases.
+ unsigned NewRootLatency = 0;
+ for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
+ NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
+ NewRootLatency += getLatency(Root, NewRoot, BlockTrace);
+ unsigned RootLatency = 0;
for (auto I : DelInstrs)
RootLatency += TSchedModel.computeInstrLatency(I);
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
-
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount = RootDepth + RootLatency +
+ (SlackIsAccurate ? RootSlack : 0);
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
dbgs() << " RootLatency: " << RootLatency << "\n";
- dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
+ << SlackIsAccurate << "\n";
dbgs() << " NewRootDepth + NewRootLatency = "
- << NewRootDepth + NewRootLatency << "\n";
+ << NewCycleCount << "\n";
dbgs() << " RootDepth + RootLatency + RootSlack = "
- << RootDepth + RootLatency + RootSlack << "\n";);
-
- unsigned NewCycleCount = NewRootDepth + NewRootLatency;
- unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+ << OldCycleCount << "\n";
+ );
return NewCycleCount <= OldCycleCount;
}
@@ -354,17 +369,44 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
+/// depths if requested.
+///
+/// \param MBB basic block to insert instructions in
+/// \param MI current machine instruction
+/// \param InsInstrs new instructions to insert in \p MBB
+/// \param DelInstrs instruction to delete from \p MBB
+/// \param MinInstr is a pointer to the machine trace information
+/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param IncrementalUpdate if true, compute instruction depths incrementally,
+/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> InsInstrs,
SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics *Traces) {
+ MachineTraceMetrics::Ensemble *MinInstr,
+ SparseSet<LiveRegUnit> &RegUnits,
+ bool IncrementalUpdate) {
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
+
+ for (auto *InstrPtr : DelInstrs) {
InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
- ++NumInstCombined;
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ // Erase all LiveRegs defined by the removed instruction
+ for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
+ if (I->MI == InstrPtr)
+ I = RegUnits.erase(I);
+ else
+ I++;
+ }
+ }
+
+ if (IncrementalUpdate)
+ for (auto *InstrPtr : InsInstrs)
+ MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+ else
+ MinInstr->invalidate(MBB);
+
+ NumInstCombined++;
}
/// Substitute a slow code sequence with a faster one by
@@ -378,9 +420,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool Changed = false;
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+ bool IncrementalUpdate = false;
auto BlockIter = MBB->begin();
+ decltype(BlockIter) LastUpdate;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(TRI->getNumRegUnits());
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -419,9 +468,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
unsigned NewInstCount = InsInstrs.size();
@@ -436,23 +482,43 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
+ if (IncrementalUpdate) {
+ // Update depths since the last incremental update.
+ MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
+ LastUpdate = BlockIter;
+ }
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
} else {
- // Calculating the trace metrics may be expensive,
- // so only do this when necessary.
+ // For big basic blocks, we only compute the full trace the first time
+ // we hit this. We do not invalidate the trace, but instead update the
+ // instruction depths incrementally.
+ // NOTE: Only the instruction depths up to MI are accurate. All other
+ // trace information is not updated.
MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
- InstrIdxForVirtReg, P) &&
+ InstrIdxForVirtReg, P,
+ !IncrementalUpdate) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above treshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
+
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -467,6 +533,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
}
+ if (Changed && IncrementalUpdate)
+ Traces->invalidate(MBB);
return Changed;
}
@@ -480,7 +548,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- OptSize = MF.getFunction()->optForSize();
+ OptSize = MF.getFunction().optForSize();
DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 7d5a68192e6b..fcec05adc732 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -12,19 +12,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+
using namespace llvm;
#define DEBUG_TYPE "machine-cp"
@@ -32,9 +39,10 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
namespace {
- typedef SmallVector<unsigned, 4> RegList;
- typedef DenseMap<unsigned, RegList> SourceMap;
- typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap;
+
+using RegList = SmallVector<unsigned, 4>;
+using SourceMap = DenseMap<unsigned, RegList>;
+using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
class MachineCopyPropagation : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
@@ -43,6 +51,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
+
MachineCopyPropagation() : MachineFunctionPass(ID) {
initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
}
@@ -67,16 +76,23 @@ namespace {
/// Candidates for deletion.
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+
/// Def -> available copies map.
Reg2MIMap AvailCopyMap;
+
/// Def -> copies map.
Reg2MIMap CopyMap;
+
/// Src -> Def map
SourceMap SrcMap;
+
bool Changed;
};
-}
+
+} // end anonymous namespace
+
char MachineCopyPropagation::ID = 0;
+
char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
@@ -170,6 +186,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
// Check that the existing copy uses the correct sub registers.
MachineInstr &PrevCopy = *CI->second;
+ if (PrevCopy.getOperand(0).isDead())
+ return false;
if (!isNopCopy(PrevCopy, Src, Def, TRI))
return false;
@@ -207,19 +225,19 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
- // %ECX<def> = COPY %EAX
- // ... nothing clobbered EAX.
- // %EAX<def> = COPY %ECX
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %eax = COPY %ecx
// =>
- // %ECX<def> = COPY %EAX
+ // %ecx = COPY %eax
//
// or
//
- // %ECX<def> = COPY %EAX
- // ... nothing clobbered EAX.
- // %ECX<def> = COPY %EAX
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %ecx = COPY %eax
// =>
- // %ECX<def> = COPY %EAX
+ // %ecx = COPY %eax
if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
continue;
@@ -243,11 +261,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If 'Def' is previously source of another copy, then this earlier copy's
// source is no longer available. e.g.
- // %xmm9<def> = copy %xmm2
+ // %xmm9 = copy %xmm2
// ...
- // %xmm2<def> = copy %xmm0
+ // %xmm2 = copy %xmm0
// ...
- // %xmm2<def> = copy %xmm9
+ // %xmm2 = copy %xmm9
ClobberRegister(Def);
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
@@ -269,7 +287,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// it's no longer available for copy propagation.
RegList &DestList = SrcMap[Src];
if (!is_contained(DestList, Def))
- DestList.push_back(Def);
+ DestList.push_back(Def);
continue;
}
@@ -360,7 +378,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
Changed = false;
@@ -374,4 +392,3 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 845e8232477c..517ac29b6450 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -26,7 +26,7 @@ static bool VerifyMachineDomInfo = true;
static bool VerifyMachineDomInfo = false;
#endif
static cl::opt<bool, true> VerifyMachineDomInfoX(
- "verify-machine-dom-info", cl::location(VerifyMachineDomInfo),
+ "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden,
cl::desc("Verify machine dominator info (time consuming)"));
namespace llvm {
@@ -148,7 +148,8 @@ void MachineDominatorTree::verifyDomTree() const {
OtherDT.recalculate(F);
if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
DT->compare(OtherDT)) {
- errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
+ errs() << "MachineDominatorTree for function " << F.getName()
+ << " is not up to date!\nComputed:\n";
DT->print(errs());
errs() << "\nActual:\n";
OtherDT.print(errs());
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index 73d778ff3023..2aa9d6b816c8 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -16,12 +16,12 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#define DEBUG_TYPE "codegen"
@@ -47,11 +47,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
}
int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
- bool isSS, const AllocaInst *Alloca) {
+ bool IsSpillSlot,
+ const AllocaInst *Alloca,
+ uint8_t StackID) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
- Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
- !isSS));
+ Objects.push_back(StackObject(Size, Alignment, 0, false, IsSpillSlot, Alloca,
+ !IsSpillSlot, StackID));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
ensureMaxAlignment(Alignment);
@@ -77,7 +79,7 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
}
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable, bool isAliased) {
+ bool IsImmutable, bool IsAliased) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
// The alignment of the frame index can be determined from its offset from
// the incoming frame position. If the frame object is at offset 32 and
@@ -85,23 +87,24 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// object is 16-byte aligned. Note that unlike the non-fixed case, if the
// stack needs realignment, we can't assume that the stack will in fact be
// aligned.
- unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
- Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
- Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/ false,
- /*Alloca*/ nullptr, isAliased));
+ unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.insert(Objects.begin(),
+ StackObject(Size, Alignment, SPOffset, IsImmutable,
+ /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+ IsAliased));
return -++NumFixedObjects;
}
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset,
- bool Immutable) {
- unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
- Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
- Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/ true,
- /*Alloca*/ nullptr,
- /*isAliased*/ false));
+ bool IsImmutable) {
+ unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.insert(Objects.begin(),
+ StackObject(Size, Alignment, SPOffset, IsImmutable,
+ /*IsSpillSlot=*/true, /*Alloca=*/nullptr,
+ /*IsAliased=*/false));
return -++NumFixedObjects;
}
@@ -212,6 +215,10 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
const StackObject &SO = Objects[i];
OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+
+ if (SO.StackID != 0)
+ OS << "id=" << SO.StackID << ' ';
+
if (SO.Size == ~0ULL) {
OS << "dead\n";
continue;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 742b095d955e..bc8eb1429d92 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- MachineFunction.cpp -----------------------------------------------===//
+//===- MachineFunction.cpp ------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,45 +14,76 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "codegen"
static cl::opt<unsigned>
- AlignAllFunctions("align-all-functions",
- cl::desc("Force the alignment of all functions."),
- cl::init(0), cl::Hidden);
+AlignAllFunctions("align-all-functions",
+ cl::desc("Force the alignment of all functions."),
+ cl::init(0), cl::Hidden);
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
- typedef MachineFunctionProperties::Property P;
+ using P = MachineFunctionProperties::Property;
+
switch(Prop) {
case P::FailedISel: return "FailedISel";
case P::IsSSA: return "IsSSA";
@@ -81,23 +112,23 @@ void MachineFunctionProperties::print(raw_ostream &OS) const {
//===----------------------------------------------------------------------===//
// Out-of-line virtual method.
-MachineFunctionInfo::~MachineFunctionInfo() {}
+MachineFunctionInfo::~MachineFunctionInfo() = default;
void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MBB->getParent()->DeleteMachineBasicBlock(MBB);
}
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
- const Function *Fn) {
- if (Fn->hasFnAttribute(Attribute::StackAlignment))
- return Fn->getFnStackAlignment();
+ const Function &F) {
+ if (F.hasFnAttribute(Attribute::StackAlignment))
+ return F.getFnStackAlignment();
return STI->getFrameLowering()->getStackAlignment();
}
-MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
+ const TargetSubtargetInfo &STI,
unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
- MMI(mmi) {
+ : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
FunctionNumber = FunctionNum;
init();
}
@@ -115,21 +146,21 @@ void MachineFunction::init() {
// We can realign the stack if the target supports it and the user hasn't
// explicitly asked us not to.
bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
- !Fn->hasFnAttribute("no-realign-stack");
+ !F.hasFnAttribute("no-realign-stack");
FrameInfo = new (Allocator) MachineFrameInfo(
- getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
+ getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
/*ForceRealign=*/CanRealignSP &&
- Fn->hasFnAttribute(Attribute::StackAlignment));
+ F.hasFnAttribute(Attribute::StackAlignment));
- if (Fn->hasFnAttribute(Attribute::StackAlignment))
- FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
+ if (F.hasFnAttribute(Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(F.getFnStackAlignment());
ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
- // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
// FIXME: Use Function::optForSize().
- if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
+ if (!F.hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -139,7 +170,7 @@ void MachineFunction::init() {
JumpTableInfo = nullptr;
if (isFuncletEHPersonality(classifyEHPersonality(
- Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) {
+ F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) {
WinEHInfo = new (Allocator) WinEHFuncInfo();
}
@@ -147,7 +178,9 @@ void MachineFunction::init() {
"Can't create a MachineFunction using a Module with a "
"Target-incompatible DataLayout attached\n");
- PSVManager = llvm::make_unique<PseudoSourceValueManager>();
+ PSVManager =
+ llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget().
+ getInstrInfo()));
}
MachineFunction::~MachineFunction() {
@@ -166,6 +199,7 @@ void MachineFunction::clear() {
InstructionRecycler.clear(Allocator);
OperandRecycler.clear(Allocator);
BasicBlockRecycler.clear(Allocator);
+ CodeViewAnnotations.clear();
VariableDbgInfos.clear();
if (RegInfo) {
RegInfo->~MachineRegisterInfo();
@@ -194,7 +228,7 @@ void MachineFunction::clear() {
}
const DataLayout &MachineFunction::getDataLayout() const {
- return Fn->getParent()->getDataLayout();
+ return F.getParent()->getDataLayout();
}
/// Get the JumpTableInfo for this function.
@@ -210,7 +244,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
/// Should we be emitting segmented stack stuff for the function
bool MachineFunction::shouldSplitStack() const {
- return getFunction()->hasFnAttribute("split-stack");
+ return getFunction().hasFnAttribute("split-stack");
}
/// This discards all of the MachineBasicBlock numbers and recomputes them.
@@ -270,6 +304,26 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
MachineInstr(*this, *Orig);
}
+MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) {
+ MachineInstr *FirstClone = nullptr;
+ MachineBasicBlock::const_instr_iterator I = Orig.getIterator();
+ while (true) {
+ MachineInstr *Cloned = CloneMachineInstr(&*I);
+ MBB.insert(InsertBefore, Cloned);
+ if (FirstClone == nullptr) {
+ FirstClone = Cloned;
+ } else {
+ Cloned->bundleWithPred();
+ }
+
+ if (!I->isBundledWithSucc())
+ break;
+ ++I;
+ }
+ return *FirstClone;
+}
+
/// Delete the given MachineInstr.
///
/// This function also serves as the MachineInstr destructor - the real
@@ -431,8 +485,7 @@ LLVM_DUMP_METHOD void MachineFunction::dump() const {
#endif
StringRef MachineFunction::getName() const {
- assert(getFunction() && "No function!");
- return getFunction()->getName();
+ return getFunction().getName();
}
void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
@@ -456,17 +509,17 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
OS << "Function Live Ins: ";
for (MachineRegisterInfo::livein_iterator
I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
- OS << PrintReg(I->first, TRI);
+ OS << printReg(I->first, TRI);
if (I->second)
- OS << " in " << PrintReg(I->second, TRI);
+ OS << " in " << printReg(I->second, TRI);
if (std::next(I) != E)
OS << ", ";
}
OS << '\n';
}
- ModuleSlotTracker MST(getFunction()->getParent());
- MST.incorporateFunction(*getFunction());
+ ModuleSlotTracker MST(getFunction().getParent());
+ MST.incorporateFunction(getFunction());
for (const auto &BB : *this) {
OS << '\n';
BB.print(OS, MST, Indexes);
@@ -476,10 +529,10 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
}
namespace llvm {
+
template<>
struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
-
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const MachineFunction *F) {
return ("CFG for '" + F->getName() + "' function").str();
@@ -492,7 +545,7 @@ namespace llvm {
raw_string_ostream OSS(OutStr);
if (isSimple()) {
- OSS << "BB#" << Node->getNumber();
+ OSS << printMBBReference(*Node);
if (const BasicBlock *BB = Node->getBasicBlock())
OSS << ": " << BB->getName();
} else
@@ -510,7 +563,8 @@ namespace llvm {
return OutStr;
}
};
-}
+
+} // end namespace llvm
void MachineFunction::viewCFG() const
{
@@ -797,7 +851,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
// alignment.
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
- return TD.getPointerABIAlignment();
+ return TD.getPointerABIAlignment(0);
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
return TD.getABIIntegerTypeAlignment(64);
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
@@ -851,9 +905,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
OS << "Jump Tables:\n";
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
- OS << " jt#" << i << ": ";
+ OS << printJumpTableEntryReference(i) << ": ";
for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
- OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
}
OS << '\n';
@@ -863,12 +917,15 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); }
#endif
+Printable llvm::printJumpTableEntryReference(unsigned Idx) {
+ return Printable([Idx](raw_ostream &OS) { OS << "%jump-table." << Idx; });
+}
//===----------------------------------------------------------------------===//
// MachineConstantPool implementation
//===----------------------------------------------------------------------===//
-void MachineConstantPoolValue::anchor() { }
+void MachineConstantPoolValue::anchor() {}
Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 535757ed87c1..14655c6eb700 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -33,6 +34,9 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -58,11 +62,8 @@
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -73,695 +74,6 @@
using namespace llvm;
-static cl::opt<bool> PrintWholeRegMask(
- "print-whole-regmask",
- cl::desc("Print the full contents of regmask operands in IR dumps"),
- cl::init(true), cl::Hidden);
-
-//===----------------------------------------------------------------------===//
-// MachineOperand Implementation
-//===----------------------------------------------------------------------===//
-
-void MachineOperand::setReg(unsigned Reg) {
- if (getReg() == Reg) return; // No change.
-
- // Otherwise, we have to change the register. If this operand is embedded
- // into a machine function, we need to update the old and new register's
- // use/def lists.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent()) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MRI.removeRegOperandFromUseList(this);
- SmallContents.RegNo = Reg;
- MRI.addRegOperandToUseList(this);
- return;
- }
-
- // Otherwise, just change the register, no problem. :)
- SmallContents.RegNo = Reg;
-}
-
-void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
- const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
- if (SubIdx && getSubReg())
- SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
- setReg(Reg);
- if (SubIdx)
- setSubReg(SubIdx);
-}
-
-void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
- if (getSubReg()) {
- Reg = TRI.getSubReg(Reg, getSubReg());
- // Note that getSubReg() may return 0 if the sub-register doesn't exist.
- // That won't happen in legal code.
- setSubReg(0);
- if (isDef())
- setIsUndef(false);
- }
- setReg(Reg);
-}
-
-/// Change a def to a use, or a use to a def.
-void MachineOperand::setIsDef(bool Val) {
- assert(isReg() && "Wrong MachineOperand accessor");
- assert((!Val || !isDebug()) && "Marking a debug operation as def");
- if (IsDef == Val)
- return;
- // MRI may keep uses and defs in different list positions.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent()) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MRI.removeRegOperandFromUseList(this);
- IsDef = Val;
- MRI.addRegOperandToUseList(this);
- return;
- }
- IsDef = Val;
-}
-
-// If this operand is currently a register operand, and if this is in a
-// function, deregister the operand from the register's use/def list.
-void MachineOperand::removeRegFromUses() {
- if (!isReg() || !isOnRegUseList())
- return;
-
- if (MachineInstr *MI = getParent()) {
- if (MachineBasicBlock *MBB = MI->getParent()) {
- if (MachineFunction *MF = MBB->getParent())
- MF->getRegInfo().removeRegOperandFromUseList(this);
- }
- }
-}
-
-/// ChangeToImmediate - Replace this operand with a new immediate operand of
-/// the specified value. If an operand is known to be an immediate already,
-/// the setImm method should be used.
-void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
- assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
-
- removeRegFromUses();
-
- OpKind = MO_Immediate;
- Contents.ImmVal = ImmVal;
-}
-
-void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
- assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
-
- removeRegFromUses();
-
- OpKind = MO_FPImmediate;
- Contents.CFP = FPImm;
-}
-
-void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) {
- assert((!isReg() || !isTied()) &&
- "Cannot change a tied operand into an external symbol");
-
- removeRegFromUses();
-
- OpKind = MO_ExternalSymbol;
- Contents.OffsetedInfo.Val.SymbolName = SymName;
- setOffset(0); // Offset is always 0.
- setTargetFlags(TargetFlags);
-}
-
-void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
- assert((!isReg() || !isTied()) &&
- "Cannot change a tied operand into an MCSymbol");
-
- removeRegFromUses();
-
- OpKind = MO_MCSymbol;
- Contents.Sym = Sym;
-}
-
-void MachineOperand::ChangeToFrameIndex(int Idx) {
- assert((!isReg() || !isTied()) &&
- "Cannot change a tied operand into a FrameIndex");
-
- removeRegFromUses();
-
- OpKind = MO_FrameIndex;
- setIndex(Idx);
-}
-
-/// ChangeToRegister - Replace this operand with a new register operand of
-/// the specified value. If an operand is known to be an register already,
-/// the setReg method should be used.
-void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
- bool isKill, bool isDead, bool isUndef,
- bool isDebug) {
- MachineRegisterInfo *RegInfo = nullptr;
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- RegInfo = &MF->getRegInfo();
- // If this operand is already a register operand, remove it from the
- // register's use/def lists.
- bool WasReg = isReg();
- if (RegInfo && WasReg)
- RegInfo->removeRegOperandFromUseList(this);
-
- // Change this to a register and set the reg#.
- OpKind = MO_Register;
- SmallContents.RegNo = Reg;
- SubReg_TargetFlags = 0;
- IsDef = isDef;
- IsImp = isImp;
- IsKill = isKill;
- IsDead = isDead;
- IsUndef = isUndef;
- IsInternalRead = false;
- IsEarlyClobber = false;
- IsDebug = isDebug;
- // Ensure isOnRegUseList() returns false.
- Contents.Reg.Prev = nullptr;
- // Preserve the tie when the operand was already a register.
- if (!WasReg)
- TiedTo = 0;
-
- // If this operand is embedded in a function, add the operand to the
- // register's use/def list.
- if (RegInfo)
- RegInfo->addRegOperandToUseList(this);
-}
-
-/// isIdenticalTo - Return true if this operand is identical to the specified
-/// operand. Note that this should stay in sync with the hash_value overload
-/// below.
-bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
- if (getType() != Other.getType() ||
- getTargetFlags() != Other.getTargetFlags())
- return false;
-
- switch (getType()) {
- case MachineOperand::MO_Register:
- return getReg() == Other.getReg() && isDef() == Other.isDef() &&
- getSubReg() == Other.getSubReg();
- case MachineOperand::MO_Immediate:
- return getImm() == Other.getImm();
- case MachineOperand::MO_CImmediate:
- return getCImm() == Other.getCImm();
- case MachineOperand::MO_FPImmediate:
- return getFPImm() == Other.getFPImm();
- case MachineOperand::MO_MachineBasicBlock:
- return getMBB() == Other.getMBB();
- case MachineOperand::MO_FrameIndex:
- return getIndex() == Other.getIndex();
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_TargetIndex:
- return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
- case MachineOperand::MO_JumpTableIndex:
- return getIndex() == Other.getIndex();
- case MachineOperand::MO_GlobalAddress:
- return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
- case MachineOperand::MO_ExternalSymbol:
- return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
- getOffset() == Other.getOffset();
- case MachineOperand::MO_BlockAddress:
- return getBlockAddress() == Other.getBlockAddress() &&
- getOffset() == Other.getOffset();
- case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut: {
- // Shallow compare of the two RegMasks
- const uint32_t *RegMask = getRegMask();
- const uint32_t *OtherRegMask = Other.getRegMask();
- if (RegMask == OtherRegMask)
- return true;
-
- // Calculate the size of the RegMask
- const MachineFunction *MF = getParent()->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
-
- // Deep compare of the two RegMasks
- return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
- }
- case MachineOperand::MO_MCSymbol:
- return getMCSymbol() == Other.getMCSymbol();
- case MachineOperand::MO_CFIIndex:
- return getCFIIndex() == Other.getCFIIndex();
- case MachineOperand::MO_Metadata:
- return getMetadata() == Other.getMetadata();
- case MachineOperand::MO_IntrinsicID:
- return getIntrinsicID() == Other.getIntrinsicID();
- case MachineOperand::MO_Predicate:
- return getPredicate() == Other.getPredicate();
- }
- llvm_unreachable("Invalid machine operand type");
-}
-
-// Note: this must stay exactly in sync with isIdenticalTo above.
-hash_code llvm::hash_value(const MachineOperand &MO) {
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- // Register operands don't have target flags.
- return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
- case MachineOperand::MO_Immediate:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
- case MachineOperand::MO_CImmediate:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
- case MachineOperand::MO_FPImmediate:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
- case MachineOperand::MO_MachineBasicBlock:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
- case MachineOperand::MO_FrameIndex:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_TargetIndex:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
- MO.getOffset());
- case MachineOperand::MO_JumpTableIndex:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
- case MachineOperand::MO_ExternalSymbol:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
- MO.getSymbolName());
- case MachineOperand::MO_GlobalAddress:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
- MO.getOffset());
- case MachineOperand::MO_BlockAddress:
- return hash_combine(MO.getType(), MO.getTargetFlags(),
- MO.getBlockAddress(), MO.getOffset());
- case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
- case MachineOperand::MO_Metadata:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
- case MachineOperand::MO_MCSymbol:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
- case MachineOperand::MO_CFIIndex:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
- case MachineOperand::MO_IntrinsicID:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
- case MachineOperand::MO_Predicate:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
- }
- llvm_unreachable("Invalid machine operand type");
-}
-
-void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
- const TargetIntrinsicInfo *IntrinsicInfo) const {
- ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, TRI, IntrinsicInfo);
-}
-
-void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- const TargetRegisterInfo *TRI,
- const TargetIntrinsicInfo *IntrinsicInfo) const {
- switch (getType()) {
- case MachineOperand::MO_Register:
- OS << PrintReg(getReg(), TRI, getSubReg());
-
- if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
- isInternalRead() || isEarlyClobber() || isTied()) {
- OS << '<';
- bool NeedComma = false;
- if (isDef()) {
- if (NeedComma) OS << ',';
- if (isEarlyClobber())
- OS << "earlyclobber,";
- if (isImplicit())
- OS << "imp-";
- OS << "def";
- NeedComma = true;
- // <def,read-undef> only makes sense when getSubReg() is set.
- // Don't clutter the output otherwise.
- if (isUndef() && getSubReg())
- OS << ",read-undef";
- } else if (isImplicit()) {
- OS << "imp-use";
- NeedComma = true;
- }
-
- if (isKill()) {
- if (NeedComma) OS << ',';
- OS << "kill";
- NeedComma = true;
- }
- if (isDead()) {
- if (NeedComma) OS << ',';
- OS << "dead";
- NeedComma = true;
- }
- if (isUndef() && isUse()) {
- if (NeedComma) OS << ',';
- OS << "undef";
- NeedComma = true;
- }
- if (isInternalRead()) {
- if (NeedComma) OS << ',';
- OS << "internal";
- NeedComma = true;
- }
- if (isTied()) {
- if (NeedComma) OS << ',';
- OS << "tied";
- if (TiedTo != 15)
- OS << unsigned(TiedTo - 1);
- }
- OS << '>';
- }
- break;
- case MachineOperand::MO_Immediate:
- OS << getImm();
- break;
- case MachineOperand::MO_CImmediate:
- getCImm()->getValue().print(OS, false);
- break;
- case MachineOperand::MO_FPImmediate:
- if (getFPImm()->getType()->isFloatTy()) {
- OS << getFPImm()->getValueAPF().convertToFloat();
- } else if (getFPImm()->getType()->isHalfTy()) {
- APFloat APF = getFPImm()->getValueAPF();
- bool Unused;
- APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
- OS << "half " << APF.convertToFloat();
- } else if (getFPImm()->getType()->isFP128Ty()) {
- APFloat APF = getFPImm()->getValueAPF();
- SmallString<16> Str;
- getFPImm()->getValueAPF().toString(Str);
- OS << "quad " << Str;
- } else if (getFPImm()->getType()->isX86_FP80Ty()) {
- APFloat APF = getFPImm()->getValueAPF();
- OS << "x86_fp80 0xK";
- APInt API = APF.bitcastToAPInt();
- OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
- /*Upper=*/true);
- OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
- /*Upper=*/true);
- } else {
- OS << getFPImm()->getValueAPF().convertToDouble();
- }
- break;
- case MachineOperand::MO_MachineBasicBlock:
- OS << "<BB#" << getMBB()->getNumber() << ">";
- break;
- case MachineOperand::MO_FrameIndex:
- OS << "<fi#" << getIndex() << '>';
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- OS << "<cp#" << getIndex();
- if (getOffset()) OS << "+" << getOffset();
- OS << '>';
- break;
- case MachineOperand::MO_TargetIndex:
- OS << "<ti#" << getIndex();
- if (getOffset()) OS << "+" << getOffset();
- OS << '>';
- break;
- case MachineOperand::MO_JumpTableIndex:
- OS << "<jt#" << getIndex() << '>';
- break;
- case MachineOperand::MO_GlobalAddress:
- OS << "<ga:";
- getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
- if (getOffset()) OS << "+" << getOffset();
- OS << '>';
- break;
- case MachineOperand::MO_ExternalSymbol:
- OS << "<es:" << getSymbolName();
- if (getOffset()) OS << "+" << getOffset();
- OS << '>';
- break;
- case MachineOperand::MO_BlockAddress:
- OS << '<';
- getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
- if (getOffset()) OS << "+" << getOffset();
- OS << '>';
- break;
- case MachineOperand::MO_RegisterMask: {
- unsigned NumRegsInMask = 0;
- unsigned NumRegsEmitted = 0;
- OS << "<regmask";
- for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
- unsigned MaskWord = i / 32;
- unsigned MaskBit = i % 32;
- if (getRegMask()[MaskWord] & (1 << MaskBit)) {
- if (PrintWholeRegMask || NumRegsEmitted <= 10) {
- OS << " " << PrintReg(i, TRI);
- NumRegsEmitted++;
- }
- NumRegsInMask++;
- }
- }
- if (NumRegsEmitted != NumRegsInMask)
- OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
- OS << ">";
- break;
- }
- case MachineOperand::MO_RegisterLiveOut:
- OS << "<regliveout>";
- break;
- case MachineOperand::MO_Metadata:
- OS << '<';
- getMetadata()->printAsOperand(OS, MST);
- OS << '>';
- break;
- case MachineOperand::MO_MCSymbol:
- OS << "<MCSym=" << *getMCSymbol() << '>';
- break;
- case MachineOperand::MO_CFIIndex:
- OS << "<call frame instruction>";
- break;
- case MachineOperand::MO_IntrinsicID: {
- Intrinsic::ID ID = getIntrinsicID();
- if (ID < Intrinsic::num_intrinsics)
- OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
- else if (IntrinsicInfo)
- OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
- else
- OS << "<intrinsic:" << ID << '>';
- break;
- }
- case MachineOperand::MO_Predicate: {
- auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
- OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
- << CmpInst::getPredicateName(Pred) << '>';
- break;
- }
- }
- if (unsigned TF = getTargetFlags())
- OS << "[TF=" << TF << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MachineOperand::dump() const {
- dbgs() << *this << '\n';
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// MachineMemOperand Implementation
-//===----------------------------------------------------------------------===//
-
-/// getAddrSpace - Return the LLVM IR address space number that this pointer
-/// points into.
-unsigned MachinePointerInfo::getAddrSpace() const {
- if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0;
- return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
-}
-
-/// isDereferenceable - Return true if V is always dereferenceable for
-/// Offset + Size byte.
-bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
- const DataLayout &DL) const {
- if (!V.is<const Value*>())
- return false;
-
- const Value *BasePtr = V.get<const Value*>();
- if (BasePtr == nullptr)
- return false;
-
- return isDereferenceableAndAlignedPointer(
- BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
-}
-
-/// getConstantPool - Return a MachinePointerInfo record that refers to the
-/// constant pool.
-MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
- return MachinePointerInfo(MF.getPSVManager().getConstantPool());
-}
-
-/// getFixedStack - Return a MachinePointerInfo record that refers to the
-/// the specified FrameIndex.
-MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
- int FI, int64_t Offset) {
- return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
-}
-
-MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
- return MachinePointerInfo(MF.getPSVManager().getJumpTable());
-}
-
-MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
- return MachinePointerInfo(MF.getPSVManager().getGOT());
-}
-
-MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
- int64_t Offset) {
- return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
-}
-
-MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
- uint64_t s, unsigned int a,
- const AAMDNodes &AAInfo,
- const MDNode *Ranges,
- SyncScope::ID SSID,
- AtomicOrdering Ordering,
- AtomicOrdering FailureOrdering)
- : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
- AAInfo(AAInfo), Ranges(Ranges) {
- assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
- isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
- "invalid pointer value");
- assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
- assert((isLoad() || isStore()) && "Not a load/store!");
-
- AtomicInfo.SSID = static_cast<unsigned>(SSID);
- assert(getSyncScopeID() == SSID && "Value truncated");
- AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
- assert(getOrdering() == Ordering && "Value truncated");
- AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
- assert(getFailureOrdering() == FailureOrdering && "Value truncated");
-}
-
-/// Profile - Gather unique data for the object.
-///
-void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(getOffset());
- ID.AddInteger(Size);
- ID.AddPointer(getOpaqueValue());
- ID.AddInteger(getFlags());
- ID.AddInteger(getBaseAlignment());
-}
-
-void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
- // The Value and Offset may differ due to CSE. But the flags and size
- // should be the same.
- assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
- assert(MMO->getSize() == getSize() && "Size mismatch!");
-
- if (MMO->getBaseAlignment() >= getBaseAlignment()) {
- // Update the alignment value.
- BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
- // Also update the base and offset, because the new alignment may
- // not be applicable with the old ones.
- PtrInfo = MMO->PtrInfo;
- }
-}
-
-/// getAlignment - Return the minimum known alignment in bytes of the
-/// actual memory reference.
-uint64_t MachineMemOperand::getAlignment() const {
- return MinAlign(getBaseAlignment(), getOffset());
-}
-
-void MachineMemOperand::print(raw_ostream &OS) const {
- ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST);
-}
-void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
- assert((isLoad() || isStore()) &&
- "SV has to be a load, store or both.");
-
- if (isVolatile())
- OS << "Volatile ";
-
- if (isLoad())
- OS << "LD";
- if (isStore())
- OS << "ST";
- OS << getSize();
-
- // Print the address information.
- OS << "[";
- if (const Value *V = getValue())
- V->printAsOperand(OS, /*PrintType=*/false, MST);
- else if (const PseudoSourceValue *PSV = getPseudoValue())
- PSV->printCustom(OS);
- else
- OS << "<unknown>";
-
- unsigned AS = getAddrSpace();
- if (AS != 0)
- OS << "(addrspace=" << AS << ')';
-
- // If the alignment of the memory reference itself differs from the alignment
- // of the base pointer, print the base alignment explicitly, next to the base
- // pointer.
- if (getBaseAlignment() != getAlignment())
- OS << "(align=" << getBaseAlignment() << ")";
-
- if (getOffset() != 0)
- OS << "+" << getOffset();
- OS << "]";
-
- // Print the alignment of the reference.
- if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
- OS << "(align=" << getAlignment() << ")";
-
- // Print TBAA info.
- if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
- OS << "(tbaa=";
- if (TBAAInfo->getNumOperands() > 0)
- TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
- else
- OS << "<unknown>";
- OS << ")";
- }
-
- // Print AA scope info.
- if (const MDNode *ScopeInfo = getAAInfo().Scope) {
- OS << "(alias.scope=";
- if (ScopeInfo->getNumOperands() > 0)
- for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
- ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
- if (i != ie-1)
- OS << ",";
- }
- else
- OS << "<unknown>";
- OS << ")";
- }
-
- // Print AA noalias scope info.
- if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
- OS << "(noalias=";
- if (NoAliasInfo->getNumOperands() > 0)
- for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
- NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
- if (i != ie-1)
- OS << ",";
- }
- else
- OS << "<unknown>";
- OS << ")";
- }
-
- if (isNonTemporal())
- OS << "(nontemporal)";
- if (isDereferenceable())
- OS << "(dereferenceable)";
- if (isInvariant())
- OS << "(invariant)";
- if (getFlags() & MOTargetFlag1)
- OS << "(flag1)";
- if (getFlags() & MOTargetFlag2)
- OS << "(flag2)";
- if (getFlags() & MOTargetFlag3)
- OS << "(flag3)";
-}
-
-//===----------------------------------------------------------------------===//
-// MachineInstr Implementation
-//===----------------------------------------------------------------------===//
-
void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
if (MCID->ImplicitDefs)
for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
@@ -1034,7 +346,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
return std::make_pair(nullptr, 0);
- MachineFunction *MF = getParent()->getParent();
+ MachineFunction *MF = getMF();
mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
MemBegin);
@@ -1108,9 +420,9 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
if (Check == IgnoreDefs)
continue;
else if (Check == IgnoreVRegDefs) {
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
- TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
- if (MO.getReg() != OMO.getReg())
+ if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
+ !TargetRegisterInfo::isVirtualRegister(OMO.getReg()))
+ if (!MO.isIdenticalTo(OMO))
return false;
} else {
if (!MO.isIdenticalTo(OMO))
@@ -1133,6 +445,10 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
return true;
}
+const MachineFunction *MachineInstr::getMF() const {
+ return getParent()->getParent();
+}
+
MachineInstr *MachineInstr::removeFromParent() {
assert(getParent() && "Not embedded in a basic block!");
return getParent()->remove(this);
@@ -1282,8 +598,8 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const {
assert(getParent() && "Can't have an MBB reference here!");
- assert(getParent()->getParent() && "Can't have an MF reference here!");
- const MachineFunction &MF = *getParent()->getParent();
+ assert(getMF() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getMF();
// Most opcodes have fixed constraints in their MCInstrDesc.
if (!isInlineAsm())
@@ -1427,7 +743,7 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg,
if (MO.isUse())
Use |= !MO.isUndef();
else if (MO.getSubReg() && !MO.isUndef())
- // A partial <def,undef> doesn't count as reading the register.
+ // A partial def undef doesn't count as reading the register.
PartDef = true;
else
FullDef = true;
@@ -1619,7 +935,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
// Treat volatile loads as stores. This is not strictly necessary for
// volatiles, but it is required for atomic loads. It is not allowed to move
// a load across an atomic load with Ordering > Monotonic.
- if (mayStore() || isCall() ||
+ if (mayStore() || isCall() || isPHI() ||
(mayLoad() && hasOrderedMemoryRef())) {
SawStore = true;
return false;
@@ -1644,8 +960,9 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
bool UseTBAA) {
- const MachineFunction *MF = getParent()->getParent();
+ const MachineFunction *MF = getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
// If neither instruction stores to memory, they can't alias in any
// meaningful way, even if they read from the same address.
@@ -1656,9 +973,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
return false;
- if (!AA)
- return true;
-
// FIXME: Need to handle multiple memory operands to support all targets.
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
return true;
@@ -1666,9 +980,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
MachineMemOperand *MMOa = *memoperands_begin();
MachineMemOperand *MMOb = *Other.memoperands_begin();
- if (!MMOa->getValue() || !MMOb->getValue())
- return true;
-
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
@@ -1681,22 +992,53 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
- // FIXME: Even before we go to AA we can reason locally about some
+ // Even before we go to AA we can reason locally about some
// memory objects. It can save compile time, and possibly catch some
// corner cases not currently covered.
- assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
- assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ int64_t OffsetA = MMOa->getOffset();
+ int64_t OffsetB = MMOb->getOffset();
+
+ int64_t MinOffset = std::min(OffsetA, OffsetB);
+ int64_t WidthA = MMOa->getSize();
+ int64_t WidthB = MMOb->getSize();
+ const Value *ValA = MMOa->getValue();
+ const Value *ValB = MMOb->getValue();
+ bool SameVal = (ValA && ValB && (ValA == ValB));
+ if (!SameVal) {
+ const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+ const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+ if (PSVa && ValB && !PSVa->mayAlias(&MFI))
+ return false;
+ if (PSVb && ValA && !PSVb->mayAlias(&MFI))
+ return false;
+ if (PSVa && PSVb && (PSVa == PSVb))
+ SameVal = true;
+ }
- int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
- int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
- int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+ if (SameVal) {
+ int64_t MaxOffset = std::max(OffsetA, OffsetB);
+ int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+ return (MinOffset + LowWidth > MaxOffset);
+ }
+
+ if (!AA)
+ return true;
+
+ if (!ValA || !ValB)
+ return true;
- AliasResult AAResult =
- AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+ assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
+ assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t Overlapa = WidthA + OffsetA - MinOffset;
+ int64_t Overlapb = WidthB + OffsetB - MinOffset;
+
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(ValA, Overlapa,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, Overlapb,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
}
@@ -1822,6 +1164,41 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
}
}
+bool MachineInstr::hasComplexRegisterTies() const {
+ const MCInstrDesc &MCID = getDesc();
+ for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
+ const auto &Operand = getOperand(I);
+ if (!Operand.isReg() || Operand.isDef())
+ // Ignore the defined registers as MCID marks only the uses as tied.
+ continue;
+ int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+ int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1;
+ if (ExpectedTiedIdx != TiedIdx)
+ return true;
+ }
+ return false;
+}
+
+LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
+ const MachineRegisterInfo &MRI) const {
+ const MachineOperand &Op = getOperand(OpIdx);
+ if (!Op.isReg())
+ return LLT{};
+
+ if (isVariadic() || OpIdx >= getNumExplicitOperands())
+ return MRI.getType(Op.getReg());
+
+ auto &OpInfo = getDesc().OpInfo[OpIdx];
+ if (!OpInfo.isGenericType())
+ return MRI.getType(Op.getReg());
+
+ if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+ return LLT{};
+
+ PrintedTypes.set(OpInfo.getGenericTypeIndex());
+ return MRI.getType(Op.getReg());
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineInstr::dump() const {
dbgs() << " ";
@@ -1834,7 +1211,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
const Module *M = nullptr;
if (const MachineBasicBlock *MBB = getParent())
if (const MachineFunction *MF = MBB->getParent())
- M = MF->getFunction()->getParent();
+ M = MF->getFunction().getParent();
ModuleSlotTracker MST(M);
print(OS, MST, SkipOpers, SkipDebugLoc, TII);
@@ -1863,21 +1240,31 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Save a list of virtual registers.
SmallVector<unsigned, 8> VirtRegs;
+ SmallBitVector PrintedTypes(8);
+ bool ShouldPrintRegisterTies = hasComplexRegisterTies();
+ auto getTiedOperandIdx = [&](unsigned OpIdx) {
+ if (!ShouldPrintRegisterTies)
+ return 0U;
+ const MachineOperand &MO = getOperand(OpIdx);
+ if (MO.isReg() && MO.isTied() && !MO.isDef())
+ return findTiedOperandIdx(OpIdx);
+ return 0U;
+ };
// Print explicitly defined operands on the left of an assignment syntax.
unsigned StartOp = 0, e = getNumOperands();
for (; StartOp < e && getOperand(StartOp).isReg() &&
- getOperand(StartOp).isDef() &&
- !getOperand(StartOp).isImplicit();
+ getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit();
++StartOp) {
- if (StartOp != 0) OS << ", ";
- getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
+ if (StartOp != 0)
+ OS << ", ";
+ LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
+ getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI,
+ IntrinsicInfo);
unsigned Reg = getOperand(StartOp).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
- LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
- if (Ty.isValid())
- OS << '(' << Ty << ')';
- }
}
if (StartOp != 0)
@@ -1900,7 +1287,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
// Print asm string.
OS << " ";
- getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI);
+ const unsigned OpIdx = InlineAsm::MIOp_AsmString;
+ LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
+ getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI,
+ IntrinsicInfo);
// Print HasSideEffects, MayLoad, MayStore, IsAlignStack
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1943,8 +1335,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
if (DIV && !DIV->getName().empty())
OS << "!\"" << DIV->getName() << '\"';
- else
- MO.print(OS, MST, TRI);
+ else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
} else if (TRI && (isInsertSubreg() || isRegSequence() ||
(isSubregToReg() && i == 3)) && MO.isImm()) {
OS << TRI->getSubRegIndexName(MO.getImm());
@@ -2006,8 +1402,15 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Compute the index of the next operand descriptor.
AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
- } else
- MO.print(OS, MST, TRI);
+ } else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ if (MO.isImm() && isOperandSubregIdx(i))
+ MachineOperand::printSubregIdx(OS, MO.getImm(), TRI);
+ else
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
}
bool HaveSemi = false;
@@ -2057,14 +1460,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else
OS << " "
<< TRI->getRegClassName(RC.get<const TargetRegisterClass *>());
- OS << ':' << PrintReg(VirtRegs[i]);
+ OS << ':' << printReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) {
++j;
continue;
}
if (VirtRegs[i] != VirtRegs[j])
- OS << "," << PrintReg(VirtRegs[j]);
+ OS << "," << printReg(VirtRegs[j]);
VirtRegs.erase(VirtRegs.begin()+j);
}
}
@@ -2328,8 +1731,8 @@ void MachineInstr::emitError(StringRef Msg) const {
MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, unsigned Offset,
- const MDNode *Variable, const MDNode *Expr) {
+ unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
@@ -2337,53 +1740,60 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
if (IsIndirect)
return BuildMI(MF, DL, MCID)
.addReg(Reg, RegState::Debug)
- .addImm(Offset)
+ .addImm(0U)
.addMetadata(Variable)
.addMetadata(Expr);
- else {
- assert(Offset == 0 && "A direct address cannot have an offset.");
+ else
return BuildMI(MF, DL, MCID)
.addReg(Reg, RegState::Debug)
.addReg(0U, RegState::Debug)
.addMetadata(Variable)
.addMetadata(Expr);
- }
}
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
bool IsIndirect, unsigned Reg,
- unsigned Offset, const MDNode *Variable,
- const MDNode *Expr) {
+ const MDNode *Variable, const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
MachineFunction &MF = *BB.getParent();
- MachineInstr *MI =
- BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
+ MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
BB.insert(I, MI);
return MachineInstrBuilder(MF, MI);
}
+/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.
+/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.
+static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
+ assert(MI.getOperand(0).isReg() && "can't spill non-register");
+ assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+
+ const DIExpression *Expr = MI.getDebugExpression();
+ if (MI.isIndirectDebugValue()) {
+ assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ }
+ return Expr;
+}
+
MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const MachineInstr &Orig,
int FrameIndex) {
- const MDNode *Var = Orig.getDebugVariable();
- const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
- bool IsIndirect = Orig.isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
- DebugLoc DL = Orig.getDebugLoc();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- // If the DBG_VALUE already was a memory location, add an extra
- // DW_OP_deref. Otherwise just turning this from a register into a
- // memory/indirect location is sufficient.
- if (IsIndirect)
- Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
- return BuildMI(BB, I, DL, Orig.getDesc())
+ const DIExpression *Expr = computeExprForSpill(Orig);
+ return BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc())
.addFrameIndex(FrameIndex)
- .addImm(Offset)
- .addMetadata(Var)
+ .addImm(0U)
+ .addMetadata(Orig.getDebugVariable())
.addMetadata(Expr);
}
+
+void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
+ const DIExpression *Expr = computeExprForSpill(Orig);
+ Orig.getOperand(0).ChangeToFrameIndex(FrameIndex);
+ Orig.getOperand(1).ChangeToImmediate(0U);
+ Orig.getOperand(3).setMetadata(Expr);
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index b5621a09c6ff..ed16a2b6084c 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -13,10 +13,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <utility>
using namespace llvm;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index c7113f1fdc47..75d449c7ac6f 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1,4 +1,4 @@
-//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,26 +16,42 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "machinelicm"
@@ -68,6 +84,7 @@ STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
namespace {
+
class MachineLICM : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetLoweringBase *TLI;
@@ -75,7 +92,7 @@ namespace {
const MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
- bool PreRegAlloc;
+ bool PreRegAlloc = true;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -89,7 +106,7 @@ namespace {
MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
// Exit blocks for CurLoop.
- SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+ SmallVector<MachineBasicBlock *, 8> ExitBlocks;
bool isExitBlock(const MachineBasicBlock *MBB) const {
return is_contained(ExitBlocks, MBB);
@@ -107,7 +124,7 @@ namespace {
SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
// For each opcode, keep a list of potential CSE instructions.
- DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+ DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap;
enum {
SpeculateFalse = 0,
@@ -122,15 +139,15 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- MachineLICM() :
- MachineFunctionPass(ID), PreRegAlloc(true) {
- initializeMachineLICMPass(*PassRegistry::getPassRegistry());
- }
- explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ MachineLICM() : MachineFunctionPass(ID) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit MachineLICM(bool PreRA)
+ : MachineFunctionPass(ID), PreRegAlloc(PreRA) {
initializeMachineLICMPass(*PassRegistry::getPassRegistry());
- }
+ }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -157,6 +174,7 @@ namespace {
MachineInstr *MI;
unsigned Def;
int FI;
+
CandidateInfo(MachineInstr *mi, unsigned def, int fi)
: MI(mi), Def(def), FI(fi) {}
};
@@ -233,10 +251,13 @@ namespace {
MachineBasicBlock *getCurPreheader();
};
+
} // end anonymous namespace
char MachineLICM::ID = 0;
+
char &llvm::MachineLICMID = MachineLICM::ID;
+
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -259,7 +280,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
}
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
Changed = FirstInLoop = false;
@@ -425,7 +446,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
// Only consider reloads for now and remats which do not have register
// operands. FIXME: Consider unfold load folding instructions.
if (Def && !RuledOut) {
- int FI = INT_MIN;
+ int FI = std::numeric_limits<int>::min();
if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
(TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
Candidates.push_back(CandidateInfo(MI, Def, FI));
@@ -492,7 +513,7 @@ void MachineLICM::HoistRegionPostRA() {
// registers read by the terminator. Similarly its def should not be
// clobbered by the terminator.
for (CandidateInfo &Candidate : Candidates) {
- if (Candidate.FI != INT_MIN &&
+ if (Candidate.FI != std::numeric_limits<int>::min() &&
StoredFIs.count(Candidate.FI))
continue;
@@ -542,8 +563,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
- DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
- << MI->getParent()->getNumber() << ": " << *MI);
+ DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from "
+ << printMBBReference(*MI->getParent()) << ": " << *MI);
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
@@ -580,14 +601,14 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
}
void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
// Remember livein register pressure.
BackTrace.push_back(RegPressure);
}
void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
BackTrace.pop_back();
}
@@ -617,7 +638,6 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
-///
void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
@@ -836,7 +856,7 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
/// Return true if this machine instruction loads from global offset table or
/// constant pool.
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
- assert (MI.mayLoad() && "Expected MI that loads!");
+ assert(MI.mayLoad() && "Expected MI that loads!");
// If we lost memory operands, conservatively assume that the instruction
// reads from everything..
@@ -876,7 +896,6 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
/// I.e., all virtual register operands are defined outside of the loop,
/// physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
-///
bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
if (!IsLICMCandidate(I))
return false;
@@ -898,8 +917,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// However, if the physreg is known to always be caller saved/restored
// then this use is safe to hoist.
if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent())))
- return false;
+ !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF())))
+ return false;
// Otherwise it's safe to move.
continue;
} else if (!MO.isDead()) {
@@ -928,7 +947,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
return true;
}
-
/// Return true if the specified instruction is used by a phi node and hoisting
/// it could cause a copy to be inserted.
bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
@@ -1173,7 +1191,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
&LoadRegIndex);
if (NewOpc == 0) return nullptr;
const MCInstrDesc &MID = TII->get(NewOpc);
- MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFunction &MF = *MI->getMF();
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
unsigned Reg = MRI->createVirtualRegister(RC);
@@ -1233,7 +1251,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
/// the existing instruction rather than hoisting the instruction to the
/// preheader.
bool MachineLICM::EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
@@ -1292,7 +1310,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// the loop.
bool MachineLICM::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
CI = CSEMap.find(Opcode);
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
@@ -1318,9 +1336,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
DEBUG({
dbgs() << "Hoisting " << *MI;
if (MI->getParent()->getBasicBlock())
- dbgs() << " from BB#" << MI->getParent()->getNumber();
+ dbgs() << " from " << printMBBReference(*MI->getParent());
if (Preheader->getBasicBlock())
- dbgs() << " to BB#" << Preheader->getNumber();
+ dbgs() << " to " << printMBBReference(*Preheader);
dbgs() << "\n";
});
@@ -1333,7 +1351,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
CI = CSEMap.find(Opcode);
if (!EliminateCSE(MI, CI)) {
// Otherwise, splice the instruction to the preheader.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 825290a438a6..8f0b89657d02 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -26,7 +27,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -207,9 +207,9 @@ bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+ HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
TheModule = &M;
-
return false;
}
@@ -276,7 +276,8 @@ MachineModuleInfo::getOrCreateMachineFunction(const Function &F) {
MachineFunction *MF;
if (I.second) {
// No pre-existing machine function, create a new one.
- MF = new MachineFunction(&F, TM, NextFnNum++, *this);
+ const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
+ MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
// Update the set entry.
I.first->second.reset(MF);
} else {
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 22d519e5d88f..07b173bc94f8 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSymbol.h"
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -24,21 +26,17 @@ using namespace llvm;
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
-static int SortSymbolPair(const void *LHS, const void *RHS) {
- typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
- const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
- const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
- return LHSS->getName().compare(RHSS->getName());
+using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
+static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
+ return LHS->first->getName().compare(RHS->first->getName());
}
MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
- if (!List.empty())
- qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+ array_pod_sort(List.begin(), List.end(), SortSymbolPair);
Map.clear();
return List;
}
-
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
new file mode 100644
index 000000000000..d17c481862a1
--- /dev/null
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -0,0 +1,936 @@
+//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Methods common to all machine operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<int>
+ PrintRegMaskNumRegs("print-regmask-num-regs",
+ cl::desc("Number of registers to limit to when "
+ "printing regmask operands in IR dumps. "
+ "unlimited = -1"),
+ cl::init(32), cl::Hidden);
+
+static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) {
+ if (const MachineInstr *MI = MO.getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF;
+ return nullptr;
+}
+static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
+ return const_cast<MachineFunction *>(
+ getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg)
+ return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineFunction *MF = getMFIfAvailable(*this)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ SmallContents.RegNo = Reg;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ if (isDef())
+ setIsUndef(false);
+ }
+ setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ assert(!IsDeadOrKill && "Changing def/use with dead/kill set not supported");
+ // MRI may keep uses and defs in different list positions.
+ if (MachineFunction *MF = getMFIfAvailable(*this)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
+bool MachineOperand::isRenamable() const {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ "isRenamable should only be checked on physical registers");
+ return IsRenamable;
+}
+
+void MachineOperand::setIsRenamable(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ "setIsRenamable should only be called on physical registers");
+ if (const MachineInstr *MI = getParent())
+ if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
+ (isUse() && MI->hasExtraSrcRegAllocReq()))
+ assert(!Val && "isRenamable should be false for "
+ "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes");
+ IsRenamable = Val;
+}
+
+void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() {
+ if (const MachineInstr *MI = getParent())
+ if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
+ (isUse() && MI->hasExtraSrcRegAllocReq()))
+ return;
+
+ setIsRenamable(true);
+}
+
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+ if (!isReg() || !isOnRegUseList())
+ return;
+
+ if (MachineFunction *MF = getMFIfAvailable(*this))
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_FPImmediate;
+ Contents.CFP = FPImm;
+}
+
+void MachineOperand::ChangeToES(const char *SymName,
+ unsigned char TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an external symbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_ExternalSymbol;
+ Contents.OffsetedInfo.Val.SymbolName = SymName;
+ setOffset(0); // Offset is always 0.
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an MCSymbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_MCSymbol;
+ Contents.Sym = Sym;
+}
+
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a FrameIndex");
+
+ removeRegFromUses();
+
+ OpKind = MO_FrameIndex;
+ setIndex(Idx);
+}
+
+void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
+ unsigned char TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a FrameIndex");
+
+ removeRegFromUses();
+
+ OpKind = MO_TargetIndex;
+ setIndex(Idx);
+ setOffset(Offset);
+ setTargetFlags(TargetFlags);
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ MachineRegisterInfo *RegInfo = nullptr;
+ if (MachineFunction *MF = getMFIfAvailable(*this))
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
+ // register's use/def lists.
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
+ RegInfo->removeRegOperandFromUseList(this);
+
+ // Change this to a register and set the reg#.
+ assert(!(isDead && !isDef) && "Dead flag on non-def");
+ assert(!(isKill && isDef) && "Kill flag on def");
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg_TargetFlags = 0;
+ IsDef = isDef;
+ IsImp = isImp;
+ IsDeadOrKill = isKill | isDead;
+ IsRenamable = false;
+ IsUndef = isUndef;
+ IsInternalRead = false;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = nullptr;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ // Calculate the size of the RegMask
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
+ // We don't know the size of the RegMask, so we can't deep compare the two
+ // reg masks.
+ return false;
+ }
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_CFIIndex:
+ return getCFIIndex() == Other.getCFIIndex();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ case MachineOperand::MO_IntrinsicID:
+ return getIntrinsicID() == Other.getIntrinsicID();
+ case MachineOperand::MO_Predicate:
+ return getPredicate() == Other.getPredicate();
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(),
+ MO.getOffset());
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ case MachineOperand::MO_CFIIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Try to crawl up to the machine function and get TRI and IntrinsicInfo from
+// it.
+static void tryToGetTargetInfo(const MachineOperand &MO,
+ const TargetRegisterInfo *&TRI,
+ const TargetIntrinsicInfo *&IntrinsicInfo) {
+ if (const MachineFunction *MF = getMFIfAvailable(MO)) {
+ TRI = MF->getSubtarget().getRegisterInfo();
+ IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
+ }
+}
+
+static void printOffset(raw_ostream &OS, int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ auto Found = find_if(Indices, [&](const std::pair<int, const char *> &I) {
+ return I.first == Index;
+ });
+ if (Found != Indices.end())
+ return Found->second;
+ return nullptr;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TF) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index,
+ const TargetRegisterInfo *TRI) {
+ OS << "%subreg.";
+ if (TRI)
+ OS << TRI->getSubRegIndexName(Index);
+ else
+ OS << Index;
+}
+
+void MachineOperand::printTargetFlags(raw_ostream &OS,
+ const MachineOperand &Op) {
+ if (!Op.getTargetFlags())
+ return;
+ const MachineFunction *MF = getMFIfAvailable(Op);
+ if (!MF)
+ return;
+
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+ OS << "target-flags(";
+ const bool HasDirectFlags = Flags.first;
+ const bool HasBitmaskFlags = Flags.second;
+ if (!HasDirectFlags && !HasBitmaskFlags) {
+ OS << "<unknown>) ";
+ return;
+ }
+ if (HasDirectFlags) {
+ if (const auto *Name = getTargetFlagName(TII, Flags.first))
+ OS << Name;
+ else
+ OS << "<unknown target flag>";
+ }
+ if (!HasBitmaskFlags) {
+ OS << ") ";
+ return;
+ }
+ bool IsCommaNeeded = HasDirectFlags;
+ unsigned BitMask = Flags.second;
+ auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &Mask : BitMasks) {
+ // Check if the flag's bitmask has the bits of the current mask set.
+ if ((BitMask & Mask.first) == Mask.first) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ IsCommaNeeded = true;
+ OS << Mask.second;
+ // Clear the bits which were serialized from the flag's bitmask.
+ BitMask &= ~(Mask.first);
+ }
+ }
+ if (BitMask) {
+ // When the resulting flag's bitmask isn't zero, we know that we didn't
+ // serialize all of the bit flags.
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << "<unknown bitmask target flag>";
+ }
+ OS << ") ";
+}
+
+void MachineOperand::printSymbol(raw_ostream &OS, MCSymbol &Sym) {
+ OS << "<mcsymbol " << Sym << ">";
+}
+
+void MachineOperand::printStackObjectReference(raw_ostream &OS,
+ unsigned FrameIndex,
+ bool IsFixed, StringRef Name) {
+ if (IsFixed) {
+ OS << "%fixed-stack." << FrameIndex;
+ return;
+ }
+
+ OS << "%stack." << FrameIndex;
+ if (!Name.empty())
+ OS << '.' << Name;
+}
+
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
+ tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
+ ModuleSlotTracker DummyMST(nullptr);
+ print(OS, DummyMST, LLT{}, /*PrintDef=*/false,
+ /*ShouldPrintRegisterTies=*/true,
+ /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
+}
+
+void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ LLT TypeToPrint, bool PrintDef,
+ bool ShouldPrintRegisterTies,
+ unsigned TiedOperandIdx,
+ const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
+ printTargetFlags(OS, *this);
+ switch (getType()) {
+ case MachineOperand::MO_Register: {
+ unsigned Reg = getReg();
+ if (isImplicit())
+ OS << (isDef() ? "implicit-def " : "implicit ");
+ else if (PrintDef && isDef())
+ // Print the 'def' flag only when the operand is defined after '='.
+ OS << "def ";
+ if (isInternalRead())
+ OS << "internal ";
+ if (isDead())
+ OS << "dead ";
+ if (isKill())
+ OS << "killed ";
+ if (isUndef())
+ OS << "undef ";
+ if (isEarlyClobber())
+ OS << "early-clobber ";
+ if (isDebug())
+ OS << "debug-use ";
+ if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
+ OS << "renamable ";
+ OS << printReg(Reg, TRI);
+ // Print the sub register.
+ if (unsigned SubReg = getSubReg()) {
+ if (TRI)
+ OS << '.' << TRI->getSubRegIndexName(SubReg);
+ else
+ OS << ".subreg" << SubReg;
+ }
+ // Print the register class / bank.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (!PrintDef || MRI.def_empty(Reg)) {
+ OS << ':';
+ OS << printRegClassOrBank(Reg, MRI, TRI);
+ }
+ }
+ }
+ // Print ties.
+ if (ShouldPrintRegisterTies && isTied() && !isDef())
+ OS << "(tied-def " << TiedOperandIdx << ")";
+ // Print types.
+ if (TypeToPrint.isValid())
+ OS << '(' << TypeToPrint << ')';
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy()) {
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ } else if (getFPImm()->getType()->isHalfTy()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ bool Unused;
+ APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
+ OS << "half " << APF.convertToFloat();
+ } else if (getFPImm()->getType()->isFP128Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ SmallString<16> Str;
+ getFPImm()->getValueAPF().toString(Str);
+ OS << "quad " << Str;
+ } else if (getFPImm()->getType()->isX86_FP80Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ OS << "x86_fp80 0xK";
+ APInt API = APF.bitcastToAPInt();
+ OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+ /*Upper=*/true);
+ OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
+ } else {
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << printMBBReference(*getMBB());
+ break;
+ case MachineOperand::MO_FrameIndex: {
+ int FrameIndex = getIndex();
+ bool IsFixed = false;
+ StringRef Name;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ IsFixed = MFI.isFixedObjectIndex(FrameIndex);
+ if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex))
+ if (Alloca->hasName())
+ Name = Alloca->getName();
+ if (IsFixed)
+ FrameIndex -= MFI.getObjectIndexBegin();
+ }
+ printStackObjectReference(OS, FrameIndex, IsFixed, Name);
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "%const." << getIndex();
+ printOffset(OS, getOffset());
+ break;
+ case MachineOperand::MO_TargetIndex: {
+ OS << "target-index(";
+ const char *Name = "<unknown>";
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex()))
+ Name = TargetIndexName;
+ OS << Name << ')';
+ printOffset(OS, getOffset());
+ break;
+ }
+ case MachineOperand::MO_JumpTableIndex:
+ OS << printJumpTableEntryReference(getIndex());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
+ printOffset(OS, getOffset());
+ break;
+ case MachineOperand::MO_ExternalSymbol: {
+ StringRef Name = getSymbolName();
+ OS << '$';
+ if (Name.empty()) {
+ OS << "\"\"";
+ } else {
+ printLLVMNameWithoutPrefix(OS, Name);
+ }
+ printOffset(OS, getOffset());
+ break;
+ }
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
+ if (getOffset())
+ OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_RegisterMask: {
+ OS << "<regmask";
+ if (TRI) {
+ unsigned NumRegsInMask = 0;
+ unsigned NumRegsEmitted = 0;
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ unsigned MaskWord = i / 32;
+ unsigned MaskBit = i % 32;
+ if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+ if (PrintRegMaskNumRegs < 0 ||
+ NumRegsEmitted <= static_cast<unsigned>(PrintRegMaskNumRegs)) {
+ OS << " " << printReg(i, TRI);
+ NumRegsEmitted++;
+ }
+ NumRegsInMask++;
+ }
+ }
+ if (NumRegsEmitted != NumRegsInMask)
+ OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+ } else {
+ OS << " ...";
+ }
+ OS << ">";
+ break;
+ }
+ case MachineOperand::MO_RegisterLiveOut: {
+ const uint32_t *RegMask = getRegLiveOut();
+ OS << "liveout(";
+ if (!TRI) {
+ OS << "<unknown>";
+ } else {
+ bool IsCommaNeeded = false;
+ for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+ if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << printReg(Reg, TRI);
+ IsCommaNeeded = true;
+ }
+ }
+ }
+ OS << ")";
+ break;
+ }
+ case MachineOperand::MO_Metadata:
+ getMetadata()->printAsOperand(OS, MST);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ printSymbol(OS, *getMCSymbol());
+ break;
+ case MachineOperand::MO_CFIIndex:
+ OS << "<call frame instruction>";
+ break;
+ case MachineOperand::MO_IntrinsicID: {
+ Intrinsic::ID ID = getIntrinsicID();
+ if (ID < Intrinsic::num_intrinsics)
+ OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+ else if (IntrinsicInfo)
+ OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+ else
+ OS << "<intrinsic:" << ID << '>';
+ break;
+ }
+ case MachineOperand::MO_Predicate: {
+ auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+ OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
+ << CmpInst::getPredicateName(Pred) << '>';
+ break;
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const { dbgs() << *this << '\n'; }
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; }
+
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!V.is<const Value *>())
+ return false;
+
+ const Value *BasePtr = V.get<const Value *>();
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(
+ BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+ int FI, int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+ int64_t Offset, uint8_t ID) {
+ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID);
+}
+
+MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace());
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+ uint64_t s, unsigned int a,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges, SyncScope::ID SSID,
+ AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering)
+ : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
+ AAInfo(AAInfo), Ranges(Ranges) {
+ assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
+ isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
+ "invalid pointer value");
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+
+ AtomicInfo.SSID = static_cast<unsigned>(SSID);
+ assert(getSyncScopeID() == SSID && "Value truncated");
+ AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+ assert(getOrdering() == Ordering && "Value truncated");
+ AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+ assert(getFailureOrdering() == FailureOrdering && "Value truncated");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(getOffset());
+ ID.AddInteger(Size);
+ ID.AddPointer(getOpaqueValue());
+ ID.AddInteger(getFlags());
+ ID.AddInteger(getBaseAlignment());
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+void MachineMemOperand::print(raw_ostream &OS) const {
+ ModuleSlotTracker DummyMST(nullptr);
+ print(OS, DummyMST);
+}
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
+ assert((isLoad() || isStore()) && "SV has to be a load, store or both.");
+
+ if (isVolatile())
+ OS << "Volatile ";
+
+ if (isLoad())
+ OS << "LD";
+ if (isStore())
+ OS << "ST";
+ OS << getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (const Value *V = getValue())
+ V->printAsOperand(OS, /*PrintType=*/false, MST);
+ else if (const PseudoSourceValue *PSV = getPseudoValue())
+ PSV->printCustom(OS);
+ else
+ OS << "<unknown>";
+
+ unsigned AS = getAddrSpace();
+ if (AS != 0)
+ OS << "(addrspace=" << AS << ')';
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (getBaseAlignment() != getAlignment())
+ OS << "(align=" << getBaseAlignment() << ")";
+
+ if (getOffset() != 0)
+ OS << "+" << getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
+ OS << "(align=" << getAlignment() << ")";
+
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA scope info.
+ if (const MDNode *ScopeInfo = getAAInfo().Scope) {
+ OS << "(alias.scope=";
+ if (ScopeInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
+ ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
+ if (i != ie - 1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA noalias scope info.
+ if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
+ OS << "(noalias=";
+ if (NoAliasInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
+ NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
+ if (i != ie - 1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ if (const MDNode *Ranges = getRanges()) {
+ unsigned NumRanges = Ranges->getNumOperands();
+ if (NumRanges != 0) {
+ OS << "(ranges=";
+
+ for (unsigned I = 0; I != NumRanges; ++I) {
+ Ranges->getOperand(I)->printAsOperand(OS, MST);
+ if (I != NumRanges - 1)
+ OS << ',';
+ }
+
+ OS << ')';
+ }
+ }
+
+ if (isNonTemporal())
+ OS << "(nontemporal)";
+ if (isDereferenceable())
+ OS << "(dereferenceable)";
+ if (isInvariant())
+ OS << "(invariant)";
+ if (getFlags() & MOTargetFlag1)
+ OS << "(flag1)";
+ if (getFlags() & MOTargetFlag2)
+ OS << "(flag2)";
+ if (getFlags() & MOTargetFlag3)
+ OS << "(flag3)";
+}
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 73c3428a6e53..ca4452218da1 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"
@@ -51,24 +50,15 @@ void MachineOptimizationRemarkEmitter::emit(
auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
computeHotness(OptDiag);
- LLVMContext &Ctx = MF.getFunction()->getContext();
+ LLVMContext &Ctx = MF.getFunction().getContext();
- // If a diagnostic has a hotness value, then only emit it if its hotness
- // meets the threshold.
- if (OptDiag.getHotness() &&
- *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+ // Only emit it if its hotness meets the threshold.
+ if (OptDiag.getHotness().getValueOr(0) <
+ Ctx.getDiagnosticsHotnessThreshold()) {
return;
}
- yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
- if (Out) {
- auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
- *Out << P;
- }
- // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
- // from here to clang.
- if (!OptDiag.isVerbose() || shouldEmitVerbose())
- Ctx.diagnose(OptDiag);
+ Ctx.diagnose(OptDiag);
}
MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
@@ -81,7 +71,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
MachineFunction &MF) {
MachineBlockFrequencyInfo *MBFI;
- if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
+ if (MF.getFunction().getContext().getDiagnosticsHotnessRequested())
MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
else
MBFI = nullptr;
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index fd6b2427891d..e4eb8802ac66 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -15,6 +15,23 @@
/// instructions. If a sequence of instructions appears often, then it ought
/// to be beneficial to pull out into a function.
///
+/// The MachineOutliner communicates with a given target using hooks defined in
+/// TargetInstrInfo.h. The target supplies the outliner with information on how
+/// a specific sequence of instructions should be outlined. This information
+/// is used to deduce the number of instructions necessary to
+///
+/// * Create an outlined function
+/// * Call that outlined function
+///
+/// Targets must implement
+/// * getOutliningCandidateInfo
+/// * insertOutlinerEpilogue
+/// * insertOutlinedCall
+/// * insertOutlinerPrologue
+/// * isFunctionSafeToOutlineFrom
+///
+/// in order to make use of the MachineOutliner.
+///
/// This was originally presented at the 2016 LLVM Developers' Meeting in the
/// talk "Reducing Code Size Using Outlining". For a high-level overview of
/// how this pass works, the talk is available on YouTube at
@@ -42,19 +59,17 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <functional>
#include <map>
#include <sstream>
@@ -64,6 +79,7 @@
#define DEBUG_TYPE "machine-outliner"
using namespace llvm;
+using namespace ore;
STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
@@ -73,19 +89,32 @@ namespace {
/// \brief An individual sequence of instructions to be replaced with a call to
/// an outlined function.
struct Candidate {
+private:
+ /// The start index of this \p Candidate in the instruction list.
+ unsigned StartIdx;
+ /// The number of instructions in this \p Candidate.
+ unsigned Len;
+
+public:
/// Set to false if the candidate overlapped with another candidate.
bool InCandidateList = true;
- /// The start index of this \p Candidate.
- size_t StartIdx;
+ /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of
+ /// \p OutlinedFunctions.
+ unsigned FunctionIdx;
- /// The number of instructions in this \p Candidate.
- size_t Len;
+ /// Contains all target-specific information for this \p Candidate.
+ TargetInstrInfo::MachineOutlinerInfo MInfo;
- /// The index of this \p Candidate's \p OutlinedFunction in the list of
- /// \p OutlinedFunctions.
- size_t FunctionIdx;
+ /// Return the number of instructions in this Candidate.
+ unsigned getLength() const { return Len; }
+
+ /// Return the start index of this candidate.
+ unsigned getStartIdx() const { return StartIdx; }
+
+ // Return the end index of this candidate.
+ unsigned getEndIdx() const { return StartIdx + Len - 1; }
/// \brief The number of instructions that would be saved by outlining every
/// candidate of this type.
@@ -96,51 +125,79 @@ struct Candidate {
/// for some given candidate.
unsigned Benefit = 0;
- Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx)
+ Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx)
: StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
Candidate() {}
/// \brief Used to ensure that \p Candidates are outlined in an order that
/// preserves the start and end indices of other \p Candidates.
- bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
+ bool operator<(const Candidate &RHS) const {
+ return getStartIdx() > RHS.getStartIdx();
+ }
};
/// \brief The information necessary to create an outlined function for some
/// class of candidate.
struct OutlinedFunction {
+private:
+ /// The number of candidates for this \p OutlinedFunction.
+ unsigned OccurrenceCount = 0;
+
+public:
+ std::vector<std::shared_ptr<Candidate>> Candidates;
+
/// The actual outlined function created.
/// This is initialized after we go through and create the actual function.
MachineFunction *MF = nullptr;
/// A number assigned to this function which appears at the end of its name.
- size_t Name;
-
- /// The number of candidates for this OutlinedFunction.
- size_t OccurrenceCount = 0;
+ unsigned Name;
/// \brief The sequence of integers corresponding to the instructions in this
/// function.
std::vector<unsigned> Sequence;
- /// The number of instructions this function would save.
- unsigned Benefit = 0;
+ /// Contains all target-specific information for this \p OutlinedFunction.
+ TargetInstrInfo::MachineOutlinerInfo MInfo;
+
+ /// Return the number of candidates for this \p OutlinedFunction.
+ unsigned getOccurrenceCount() { return OccurrenceCount; }
+
+ /// Decrement the occurrence count of this OutlinedFunction and return the
+ /// new count.
+ unsigned decrement() {
+ assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
+ OccurrenceCount--;
+ return getOccurrenceCount();
+ }
+
+ /// \brief Return the number of instructions it would take to outline this
+ /// function.
+ unsigned getOutliningCost() {
+ return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() +
+ MInfo.FrameOverhead;
+ }
- /// \brief Set to true if candidates for this outlined function should be
- /// replaced with tail calls to this OutlinedFunction.
- bool IsTailCall = false;
+ /// \brief Return the number of instructions that would be saved by outlining
+ /// this function.
+ unsigned getBenefit() {
+ unsigned NotOutlinedCost = OccurrenceCount * Sequence.size();
+ unsigned OutlinedCost = getOutliningCost();
+ return (NotOutlinedCost < OutlinedCost) ? 0
+ : NotOutlinedCost - OutlinedCost;
+ }
- OutlinedFunction(size_t Name, size_t OccurrenceCount,
+ OutlinedFunction(unsigned Name, unsigned OccurrenceCount,
const std::vector<unsigned> &Sequence,
- unsigned Benefit, bool IsTailCall)
- : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
- Benefit(Benefit), IsTailCall(IsTailCall)
- {}
+ TargetInstrInfo::MachineOutlinerInfo &MInfo)
+ : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence),
+ MInfo(MInfo) {}
};
/// Represents an undefined index in the suffix tree.
-const size_t EmptyIdx = -1;
+const unsigned EmptyIdx = -1;
/// A node in a suffix tree which represents a substring or suffix.
///
@@ -170,7 +227,7 @@ struct SuffixTreeNode {
bool IsInTree = true;
/// The start index of this node's substring in the main string.
- size_t StartIdx = EmptyIdx;
+ unsigned StartIdx = EmptyIdx;
/// The end index of this node's substring in the main string.
///
@@ -178,24 +235,23 @@ struct SuffixTreeNode {
/// step in the construction algorithm. To avoid having to update O(N)
/// nodes individually at the end of every step, the end index is stored
/// as a pointer.
- size_t *EndIdx = nullptr;
+ unsigned *EndIdx = nullptr;
/// For leaves, the start index of the suffix represented by this node.
///
/// For all other nodes, this is ignored.
- size_t SuffixIdx = EmptyIdx;
+ unsigned SuffixIdx = EmptyIdx;
/// \brief For internal nodes, a pointer to the internal node representing
/// the same sequence with the first character chopped off.
///
- /// This has two major purposes in the suffix tree. The first is as a
- /// shortcut in Ukkonen's construction algorithm. One of the things that
+ /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
/// Ukkonen's algorithm does to achieve linear-time construction is
/// keep track of which node the next insert should be at. This makes each
/// insert O(1), and there are a total of O(N) inserts. The suffix link
/// helps with inserting children of internal nodes.
///
- /// Say we add a child to an internal node with associated mapping S. The
+ /// Say we add a child to an internal node with associated mapping S. The
/// next insertion must be at the node representing S - its first character.
/// This is given by the way that we iteratively build the tree in Ukkonen's
/// algorithm. The main idea is to look at the suffixes of each prefix in the
@@ -204,27 +260,6 @@ struct SuffixTreeNode {
/// move to the next insertion point in O(1) time. If we don't, then we'd
/// have to query from the root, which takes O(N) time. This would make the
/// construction algorithm O(N^2) rather than O(N).
- ///
- /// The suffix link is also used during the tree pruning process to let us
- /// quickly throw out a bunch of potential overlaps. Say we have a sequence
- /// S we want to outline. Then each of its suffixes contribute to at least
- /// one overlapping case. Therefore, we can follow the suffix links
- /// starting at the node associated with S to the root and "delete" those
- /// nodes, save for the root. For each candidate, this removes
- /// O(|candidate|) overlaps from the search space. We don't actually
- /// completely invalidate these nodes though; doing that is far too
- /// aggressive. Consider the following pathological string:
- ///
- /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3
- ///
- /// If we, for the sake of example, outlined 1 2 3, then we would throw
- /// out all instances of 2 3. This isn't desirable. To get around this,
- /// when we visit a link node, we decrement its occurrence count by the
- /// number of sequences we outlined in the current step. In the pathological
- /// example, the 2 3 node would have an occurrence count of 8, while the
- /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node
- /// would survive to the next round allowing us to outline the extra
- /// instances of 2 3.
SuffixTreeNode *Link = nullptr;
/// The parent of this node. Every node except for the root has a parent.
@@ -234,11 +269,11 @@ struct SuffixTreeNode {
///
/// This is equal to the number of leaf children of the string. It represents
/// the number of suffixes that the node's string is a prefix of.
- size_t OccurrenceCount = 0;
+ unsigned OccurrenceCount = 0;
/// The length of the string formed by concatenating the edge labels from the
/// root to this node.
- size_t ConcatLen = 0;
+ unsigned ConcatLen = 0;
/// Returns true if this node is a leaf.
bool isLeaf() const { return SuffixIdx != EmptyIdx; }
@@ -260,7 +295,7 @@ struct SuffixTreeNode {
return *EndIdx - StartIdx + 1;
}
- SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
+ SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
SuffixTreeNode *Parent)
: StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
@@ -290,10 +325,16 @@ struct SuffixTreeNode {
///
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
class SuffixTree {
-private:
+public:
+ /// Stores each leaf node in the tree.
+ ///
+ /// This is used for finding outlining candidates.
+ std::vector<SuffixTreeNode *> LeafVector;
+
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;
+private:
/// Maintains each node in the tree.
SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
@@ -303,11 +344,6 @@ private:
/// \p NodeAllocator like every other node in the tree.
SuffixTreeNode *Root = nullptr;
- /// Stores each leaf node in the tree.
- ///
- /// This is used for finding outlining candidates.
- std::vector<SuffixTreeNode *> LeafVector;
-
/// Maintains the end indices of the internal nodes in the tree.
///
/// Each internal node is guaranteed to never have its end index change
@@ -318,7 +354,7 @@ private:
BumpPtrAllocator InternalEndIdxAllocator;
/// The end index of each leaf in the tree.
- size_t LeafEndIdx = -1;
+ unsigned LeafEndIdx = -1;
/// \brief Helper struct which keeps track of the next insertion point in
/// Ukkonen's algorithm.
@@ -327,10 +363,10 @@ private:
SuffixTreeNode *Node;
/// The index of the first character in the substring currently being added.
- size_t Idx = EmptyIdx;
+ unsigned Idx = EmptyIdx;
/// The length of the substring we have to add at the current step.
- size_t Len = 0;
+ unsigned Len = 0;
};
/// \brief The point the next insertion will take place at in the
@@ -344,15 +380,13 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated leaf node.
- SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
+ SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
unsigned Edge) {
assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
- SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
- &LeafEndIdx,
- nullptr,
- &Parent);
+ SuffixTreeNode *N = new (NodeAllocator.Allocate())
+ SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
Parent.Children[Edge] = N;
return N;
@@ -366,18 +400,16 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated internal node.
- SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
- size_t EndIdx, unsigned Edge) {
+ SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
+ unsigned EndIdx, unsigned Edge) {
assert(StartIdx <= EndIdx && "String can't start after it ends!");
assert(!(!Parent && StartIdx != EmptyIdx) &&
- "Non-root internal nodes must have parents!");
+ "Non-root internal nodes must have parents!");
- size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
- SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
- E,
- Root,
- Parent);
+ unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
+ SuffixTreeNode *N = new (NodeAllocator.Allocate())
+ SuffixTreeNode(StartIdx, E, Root, Parent);
if (Parent)
Parent->Children[Edge] = N;
@@ -390,7 +422,7 @@ private:
///
/// \param[in] CurrNode The node currently being visited.
/// \param CurrIdx The current index of the string being visited.
- void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
+ void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
@@ -401,14 +433,13 @@ private:
CurrNode.ConcatLen = CurrNode.size();
if (CurrNode.Parent)
- CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
+ CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
}
// Traverse the tree depth-first.
for (auto &ChildPair : CurrNode.Children) {
assert(ChildPair.second && "Node had a null child!");
- setSuffixIndices(*ChildPair.second,
- CurrIdx + ChildPair.second->size());
+ setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
}
// Is this node a leaf?
@@ -437,11 +468,11 @@ private:
///
/// \returns The number of suffixes that have not been added at the end of
/// this step.
- unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
+ unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeNode *NeedsLink = nullptr;
while (SuffixesToAdd > 0) {
-
+
// Are we waiting to add anything other than just the last character?
if (Active.Len == 0) {
// If not, then say the active index is the end index.
@@ -469,7 +500,7 @@ private:
// insert a new node.
SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
- size_t SubstringLen = NextNode->size();
+ unsigned SubstringLen = NextNode->size();
// Is the current suffix we're trying to insert longer than the size of
// the child we want to move to?
@@ -515,10 +546,8 @@ private:
// The node s from the diagram
SuffixTreeNode *SplitNode =
- insertInternalNode(Active.Node,
- NextNode->StartIdx,
- NextNode->StartIdx + Active.Len - 1,
- FirstChar);
+ insertInternalNode(Active.Node, NextNode->StartIdx,
+ NextNode->StartIdx + Active.Len - 1, FirstChar);
// Insert the new node representing the new substring into the tree as
// a child of the split node. This is the node l from the diagram.
@@ -556,87 +585,6 @@ private:
}
public:
-
- /// Find all repeated substrings that satisfy \p BenefitFn.
- ///
- /// If a substring appears at least twice, then it must be represented by
- /// an internal node which appears in at least two suffixes. Each suffix is
- /// represented by a leaf node. To do this, we visit each internal node in
- /// the tree, using the leaf children of each internal node. If an internal
- /// node represents a beneficial substring, then we use each of its leaf
- /// children to find the locations of its substring.
- ///
- /// \param[out] CandidateList Filled with candidates representing each
- /// beneficial substring.
- /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
- /// type of candidate.
- /// \param BenefitFn The function to satisfy.
- ///
- /// \returns The length of the longest candidate found.
- size_t findCandidates(std::vector<Candidate> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)>
- &BenefitFn) {
-
- CandidateList.clear();
- FunctionList.clear();
- size_t FnIdx = 0;
- size_t MaxLen = 0;
-
- for (SuffixTreeNode* Leaf : LeafVector) {
- assert(Leaf && "Leaves in LeafVector cannot be null!");
- if (!Leaf->IsInTree)
- continue;
-
- assert(Leaf->Parent && "All leaves must have parents!");
- SuffixTreeNode &Parent = *(Leaf->Parent);
-
- // If it doesn't appear enough, or we already outlined from it, skip it.
- if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
- continue;
-
- size_t StringLen = Leaf->ConcatLen - Leaf->size();
-
- // How many instructions would outlining this string save?
- unsigned Benefit = BenefitFn(Parent,
- StringLen, Str[Leaf->SuffixIdx + StringLen - 1]);
-
- // If it's not beneficial, skip it.
- if (Benefit < 1)
- continue;
-
- if (StringLen > MaxLen)
- MaxLen = StringLen;
-
- unsigned OccurrenceCount = 0;
- for (auto &ChildPair : Parent.Children) {
- SuffixTreeNode *M = ChildPair.second;
-
- // Is it a leaf? If so, we have an occurrence of this candidate.
- if (M && M->IsInTree && M->isLeaf()) {
- OccurrenceCount++;
- CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx);
- CandidateList.back().Benefit = Benefit;
- M->IsInTree = false;
- }
- }
-
- // Save the function for the new candidate sequence.
- std::vector<unsigned> CandidateSequence;
- for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
- CandidateSequence.push_back(Str[i]);
-
- FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence,
- Benefit, false);
-
- // Move to the next function.
- FnIdx++;
- Parent.IsInTree = false;
- }
-
- return MaxLen;
- }
-
/// Construct a suffix tree from a sequence of unsigned integers.
///
/// \param Str The string to construct the suffix tree for.
@@ -644,17 +592,18 @@ public:
Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
Root->IsInTree = true;
Active.Node = Root;
- LeafVector = std::vector<SuffixTreeNode*>(Str.size());
+ LeafVector = std::vector<SuffixTreeNode *>(Str.size());
// Keep track of the number of suffixes we have to add of the current
// prefix.
- size_t SuffixesToAdd = 0;
+ unsigned SuffixesToAdd = 0;
Active.Node = Root;
// Construct the suffix tree iteratively on each prefix of the string.
// PfxEndIdx is the end index of the current prefix.
// End is one past the last element in the string.
- for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+ for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
+ PfxEndIdx++) {
SuffixesToAdd++;
LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
@@ -708,9 +657,9 @@ struct InstructionMapper {
MachineInstr &MI = *It;
bool WasInserted;
DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
- ResultIt;
+ ResultIt;
std::tie(ResultIt, WasInserted) =
- InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+ InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
unsigned MINumber = ResultIt->second;
// There was an insertion.
@@ -725,10 +674,10 @@ struct InstructionMapper {
if (LegalInstrNumber >= IllegalInstrNumber)
report_fatal_error("Instruction mapping overflow!");
- assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey()
- && "Tried to assign DenseMap tombstone or empty key to instruction.");
- assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey()
- && "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "Tried to assign DenseMap tombstone or empty key to instruction.");
return MINumber;
}
@@ -748,13 +697,11 @@ struct InstructionMapper {
assert(LegalInstrNumber < IllegalInstrNumber &&
"Instruction mapping overflow!");
- assert(IllegalInstrNumber !=
- DenseMapInfo<unsigned>::getEmptyKey() &&
- "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
- assert(IllegalInstrNumber !=
- DenseMapInfo<unsigned>::getTombstoneKey() &&
- "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+ assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
return MINumber;
}
@@ -777,17 +724,17 @@ struct InstructionMapper {
It++) {
// Keep track of where this instruction is in the module.
- switch(TII.getOutliningType(*It)) {
- case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
- mapToIllegalUnsigned(It);
- break;
+ switch (TII.getOutliningType(*It)) {
+ case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+ mapToIllegalUnsigned(It);
+ break;
- case TargetInstrInfo::MachineOutlinerInstrType::Legal:
- mapToLegalUnsigned(It);
- break;
+ case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+ mapToLegalUnsigned(It);
+ break;
- case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
- break;
+ case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+ break;
}
}
@@ -804,9 +751,9 @@ struct InstructionMapper {
// Make sure that the implementation of DenseMapInfo<unsigned> hasn't
// changed.
assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
- "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
- "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
}
};
@@ -823,6 +770,10 @@ struct MachineOutliner : public ModulePass {
static char ID;
+ /// \brief Set to true if the outliner should consider functions with
+ /// linkonceodr linkage.
+ bool OutlineFromLinkOnceODRs = false;
+
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -832,10 +783,35 @@ struct MachineOutliner : public ModulePass {
ModulePass::getAnalysisUsage(AU);
}
- MachineOutliner() : ModulePass(ID) {
+ MachineOutliner(bool OutlineFromLinkOnceODRs = false)
+ : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) {
initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
}
+ /// Find all repeated substrings that satisfy the outlining cost model.
+ ///
+ /// If a substring appears at least twice, then it must be represented by
+ /// an internal node which appears in at least two suffixes. Each suffix is
+ /// represented by a leaf node. To do this, we visit each internal node in
+ /// the tree, using the leaf children of each internal node. If an internal
+ /// node represents a beneficial substring, then we use each of its leaf
+ /// children to find the locations of its substring.
+ ///
+ /// \param ST A suffix tree to query.
+ /// \param TII TargetInstrInfo for the target.
+ /// \param Mapper Contains outlining mapping information.
+ /// \param[out] CandidateList Filled with candidates representing each
+ /// beneficial substring.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
+ /// type of candidate.
+ ///
+ /// \returns The length of the longest candidate found.
+ unsigned
+ findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
+ InstructionMapper &Mapper,
+ std::vector<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList);
+
/// \brief Replace the sequences of instructions represented by the
/// \p Candidates in \p CandidateList with calls to \p MachineFunctions
/// described in \p FunctionList.
@@ -844,7 +820,8 @@ struct MachineOutliner : public ModulePass {
/// \param CandidateList A list of candidates to be outlined.
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
- bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
+ bool outline(Module &M,
+ const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
InstructionMapper &Mapper);
@@ -865,11 +842,15 @@ struct MachineOutliner : public ModulePass {
/// \param TII TargetInstrInfo for the module.
///
/// \returns The length of the longest candidate found. 0 if there are none.
- unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST,
- InstructionMapper &Mapper,
- const TargetInstrInfo &TII);
+ unsigned
+ buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST, InstructionMapper &Mapper,
+ const TargetInstrInfo &TII);
+
+ /// Helper function for pruneOverlaps.
+ /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
+ void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
/// \brief Remove any overlapping candidates that weren't handled by the
/// suffix tree's pruning method.
@@ -881,11 +862,12 @@ struct MachineOutliner : public ModulePass {
///
/// \param[in,out] CandidateList A list of outlining candidates.
/// \param[in,out] FunctionList A list of functions to be outlined.
+ /// \param Mapper Contains instruction mapping info for outlining.
/// \param MaxCandidateLen The length of the longest candidate.
/// \param TII TargetInstrInfo for the module.
- void pruneOverlaps(std::vector<Candidate> &CandidateList,
+ void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList,
- unsigned MaxCandidateLen,
+ InstructionMapper &Mapper, unsigned MaxCandidateLen,
const TargetInstrInfo &TII);
/// Construct a suffix tree on the instructions in \p M and outline repeated
@@ -898,16 +880,223 @@ struct MachineOutliner : public ModulePass {
char MachineOutliner::ID = 0;
namespace llvm {
-ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
+ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) {
+ return new MachineOutliner(OutlineFromLinkOnceODRs);
+}
+
+} // namespace llvm
+
+INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
+ false)
+
+unsigned MachineOutliner::findCandidates(
+ SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
+ std::vector<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList) {
+ CandidateList.clear();
+ FunctionList.clear();
+ unsigned MaxLen = 0;
+
+ // FIXME: Visit internal nodes instead of leaves.
+ for (SuffixTreeNode *Leaf : ST.LeafVector) {
+ assert(Leaf && "Leaves in LeafVector cannot be null!");
+ if (!Leaf->IsInTree)
+ continue;
+
+ assert(Leaf->Parent && "All leaves must have parents!");
+ SuffixTreeNode &Parent = *(Leaf->Parent);
+
+ // If it doesn't appear enough, or we already outlined from it, skip it.
+ if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
+ continue;
+
+ // Figure out if this candidate is beneficial.
+ unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
+
+ // Too short to be beneficial; skip it.
+ // FIXME: This isn't necessarily true for, say, X86. If we factor in
+ // instruction lengths we need more information than this.
+ if (StringLen < 2)
+ continue;
+
+ // If this is a beneficial class of candidate, then every one is stored in
+ // this vector.
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+
+ // Describes the start and end point of each candidate. This allows the
+ // target to infer some information about each occurrence of each repeated
+ // sequence.
+ // FIXME: CandidatesForRepeatedSeq and this should be combined.
+ std::vector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ RepeatedSequenceLocs;
+
+ // Figure out the call overhead for each instance of the sequence.
+ for (auto &ChildPair : Parent.Children) {
+ SuffixTreeNode *M = ChildPair.second;
+
+ if (M && M->IsInTree && M->isLeaf()) {
+ // Never visit this leaf again.
+ M->IsInTree = false;
+ unsigned StartIdx = M->SuffixIdx;
+ unsigned EndIdx = StartIdx + StringLen - 1;
+
+ // Trick: Discard some candidates that would be incompatible with the
+ // ones we've already found for this sequence. This will save us some
+ // work in candidate selection.
+ //
+ // If two candidates overlap, then we can't outline them both. This
+ // happens when we have candidates that look like, say
+ //
+ // AA (where each "A" is an instruction).
+ //
+ // We might have some portion of the module that looks like this:
+ // AAAAAA (6 A's)
+ //
+ // In this case, there are 5 different copies of "AA" in this range, but
+ // at most 3 can be outlined. If only outlining 3 of these is going to
+ // be unbeneficial, then we ought to not bother.
+ //
+ // Note that two things DON'T overlap when they look like this:
+ // start1...end1 .... start2...end2
+ // That is, one must either
+ // * End before the other starts
+ // * Start after the other ends
+ if (std::all_of(CandidatesForRepeatedSeq.begin(),
+ CandidatesForRepeatedSeq.end(),
+ [&StartIdx, &EndIdx](const Candidate &C) {
+ return (EndIdx < C.getStartIdx() ||
+ StartIdx > C.getEndIdx());
+ })) {
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+
+ // Save the candidate and its location.
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen,
+ FunctionList.size());
+ RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));
+ }
+ }
+ }
+
+ // We've found something we might want to outline.
+ // Create an OutlinedFunction to store it and check if it'd be beneficial
+ // to outline.
+ TargetInstrInfo::MachineOutlinerInfo MInfo =
+ TII.getOutlininingCandidateInfo(RepeatedSequenceLocs);
+ std::vector<unsigned> Seq;
+ for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
+ Seq.push_back(ST.Str[i]);
+ OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
+ Seq, MInfo);
+ unsigned Benefit = OF.getBenefit();
+
+ // Is it better to outline this candidate than not?
+ if (Benefit < 1) {
+ // Outlining this candidate would take more instructions than not
+ // outlining.
+ // Emit a remark explaining why we didn't outline this candidate.
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
+ RepeatedSequenceLocs[0];
+ MachineOptimizationRemarkEmitter MORE(
+ *(C.first->getParent()->getParent()), nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
+ C.first->getDebugLoc(),
+ C.first->getParent());
+ R << "Did not outline " << NV("Length", StringLen) << " instructions"
+ << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size())
+ << " locations."
+ << " Instructions from outlining all occurrences ("
+ << NV("OutliningCost", OF.getOutliningCost()) << ")"
+ << " >= Unoutlined instruction count ("
+ << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")"
+ << " (Also found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) {
+ R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
+ RepeatedSequenceLocs[i].first->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+ return R;
+ });
+
+ // Move to the next candidate.
+ continue;
+ }
+
+ if (StringLen > MaxLen)
+ MaxLen = StringLen;
+
+ // At this point, the candidate class is seen as beneficial. Set their
+ // benefit values and save them in the candidate list.
+ std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
+ for (Candidate &C : CandidatesForRepeatedSeq) {
+ C.Benefit = Benefit;
+ C.MInfo = MInfo;
+ std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
+ CandidateList.push_back(Cptr);
+ CandidatesForFn.push_back(Cptr);
+ }
+
+ FunctionList.push_back(OF);
+ FunctionList.back().Candidates = CandidatesForFn;
+
+ // Move to the next function.
+ Parent.IsInTree = false;
+ }
+
+ return MaxLen;
+}
+
+// Remove C from the candidate space, and update its OutlinedFunction.
+void MachineOutliner::prune(Candidate &C,
+ std::vector<OutlinedFunction> &FunctionList) {
+ // Get the OutlinedFunction associated with this Candidate.
+ OutlinedFunction &F = FunctionList[C.FunctionIdx];
+
+ // Update C's associated function's occurrence count.
+ F.decrement();
+
+ // Remove C from the CandidateList.
+ C.InCandidateList = false;
+
+ DEBUG(dbgs() << "- Removed a Candidate \n";
+ dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount()
+ << "\n";
+ dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
+ << "\n";);
}
-INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE,
- "Machine Function Outliner", false, false)
+void MachineOutliner::pruneOverlaps(
+ std::vector<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
+ unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
+
+ // Return true if this candidate became unbeneficial for outlining in a
+ // previous step.
+ auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
+
+ // Check if the candidate was removed in a previous step.
+ if (!C.InCandidateList)
+ return true;
+
+ // C must be alive. Check if we should remove it.
+ if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
+ prune(C, FunctionList);
+ return true;
+ }
+
+ // C is in the list, and F is still beneficial.
+ return false;
+ };
-void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- unsigned MaxCandidateLen,
- const TargetInstrInfo &TII) {
// TODO: Experiment with interval trees or other interval-checking structures
// to lower the time complexity of this function.
// TODO: Can we do better than the simple greedy choice?
@@ -915,56 +1104,35 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
// This is O(MaxCandidateLen * CandidateList.size()).
for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
It++) {
- Candidate &C1 = *It;
- OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
+ Candidate &C1 = **It;
- // If we removed this candidate, skip it.
- if (!C1.InCandidateList)
+ // If C1 was already pruned, or its function is no longer beneficial for
+ // outlining, move to the next candidate.
+ if (ShouldSkipCandidate(C1))
continue;
- // Is it still worth it to outline C1?
- if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
- assert(F1.OccurrenceCount > 0 &&
- "Can't remove OutlinedFunction with no occurrences!");
- F1.OccurrenceCount--;
- C1.InCandidateList = false;
- continue;
- }
-
// The minimum start index of any candidate that could overlap with this
// one.
unsigned FarthestPossibleIdx = 0;
// Either the index is 0, or it's at most MaxCandidateLen indices away.
- if (C1.StartIdx > MaxCandidateLen)
- FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
+ if (C1.getStartIdx() > MaxCandidateLen)
+ FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
// Compare against the candidates in the list that start at at most
// FarthestPossibleIdx indices away from C1. There are at most
// MaxCandidateLen of these.
for (auto Sit = It + 1; Sit != Et; Sit++) {
- Candidate &C2 = *Sit;
- OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
+ Candidate &C2 = **Sit;
// Is this candidate too far away to overlap?
- if (C2.StartIdx < FarthestPossibleIdx)
+ if (C2.getStartIdx() < FarthestPossibleIdx)
break;
- // Did we already remove this candidate in a previous step?
- if (!C2.InCandidateList)
- continue;
-
- // Is the function beneficial to outline?
- if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
- // If not, remove this candidate and move to the next one.
- assert(F2.OccurrenceCount > 0 &&
- "Can't remove OutlinedFunction with no occurrences!");
- F2.OccurrenceCount--;
- C2.InCandidateList = false;
+ // If C2 was already pruned, or its function is no longer beneficial for
+ // outlining, move to the next candidate.
+ if (ShouldSkipCandidate(C2))
continue;
- }
-
- size_t C2End = C2.StartIdx + C2.Len - 1;
// Do C1 and C2 overlap?
//
@@ -974,7 +1142,7 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
// We sorted our candidate list so C2Start <= C1Start. We know that
// C2End > C2Start since each candidate has length >= 2. Therefore, all we
// have to check is C2End < C2Start to see if we overlap.
- if (C2End < C1.StartIdx)
+ if (C2.getEndIdx() < C1.getStartIdx())
continue;
// C1 and C2 overlap.
@@ -982,118 +1150,52 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
//
// Approximate this by picking the one which would have saved us the
// most instructions before any pruning.
- if (C1.Benefit >= C2.Benefit) {
-
- // C1 is better, so remove C2 and update C2's OutlinedFunction to
- // reflect the removal.
- assert(F2.OccurrenceCount > 0 &&
- "Can't remove OutlinedFunction with no occurrences!");
- F2.OccurrenceCount--;
- F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
- F2.OccurrenceCount,
- F2.IsTailCall
- );
-
- C2.InCandidateList = false;
-
- DEBUG (
- dbgs() << "- Removed C2. \n";
- dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n";
- dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";
- );
- } else {
- // C2 is better, so remove C1 and update C1's OutlinedFunction to
- // reflect the removal.
- assert(F1.OccurrenceCount > 0 &&
- "Can't remove OutlinedFunction with no occurrences!");
- F1.OccurrenceCount--;
- F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
- F1.OccurrenceCount,
- F1.IsTailCall
- );
- C1.InCandidateList = false;
-
- DEBUG (
- dbgs() << "- Removed C1. \n";
- dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n";
- dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";
- );
-
- // C1 is out, so we don't have to compare it against anyone else.
+ // Is C2 a better candidate?
+ if (C2.Benefit > C1.Benefit) {
+ // Yes, so prune C1. Since C1 is dead, we don't have to compare it
+ // against anything anymore, so break.
+ prune(C1, FunctionList);
break;
}
+
+ // Prune C2 and move on to the next candidate.
+ prune(C2, FunctionList);
}
}
}
-unsigned
-MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- SuffixTree &ST,
- InstructionMapper &Mapper,
- const TargetInstrInfo &TII) {
+unsigned MachineOutliner::buildCandidateList(
+ std::vector<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
+ InstructionMapper &Mapper, const TargetInstrInfo &TII) {
std::vector<unsigned> CandidateSequence; // Current outlining candidate.
- size_t MaxCandidateLen = 0; // Length of the longest candidate.
-
- // Function for maximizing query in the suffix tree.
- // This allows us to define more fine-grained types of things to outline in
- // the target without putting target-specific info in the suffix tree.
- auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr,
- size_t StringLen, unsigned EndVal) {
-
- // The root represents the empty string.
- if (Curr.isRoot())
- return 0u;
-
- // Is this long enough to outline?
- // TODO: Let the target decide how "long" a string is in terms of the sizes
- // of the instructions in the string. For example, if a call instruction
- // is smaller than a one instruction string, we should outline that string.
- if (StringLen < 2)
- return 0u;
-
- size_t Occurrences = Curr.OccurrenceCount;
+ unsigned MaxCandidateLen = 0; // Length of the longest candidate.
- // Anything we want to outline has to appear at least twice.
- if (Occurrences < 2)
- return 0u;
-
- // Check if the last instruction in the sequence is a return.
- MachineInstr *LastInstr =
- Mapper.IntegerInstructionMap[EndVal];
- assert(LastInstr && "Last instruction in sequence was unmapped!");
-
- // The only way a terminator could be mapped as legal is if it was safe to
- // tail call.
- bool IsTailCall = LastInstr->isTerminator();
- return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall);
- };
-
- MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn);
-
- for (auto &OF : FunctionList)
- OF.IsTailCall = Mapper.
- IntegerInstructionMap[OF.Sequence.back()]->isTerminator();
+ MaxCandidateLen =
+ findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
// Sort the candidates in decending order. This will simplify the outlining
// process when we have to remove the candidates from the mapping by
// allowing us to cut them out without keeping track of an offset.
- std::stable_sort(CandidateList.begin(), CandidateList.end());
+ std::stable_sort(
+ CandidateList.begin(), CandidateList.end(),
+ [](const std::shared_ptr<Candidate> &LHS,
+ const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
return MaxCandidateLen;
}
MachineFunction *
MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
- InstructionMapper &Mapper) {
+ InstructionMapper &Mapper) {
// Create the function name. This should be unique. For now, just hash the
// module name and include it in the function name plus the number of this
// function.
std::ostringstream NameStream;
- NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name;
+ NameStream << "OUTLINED_FUNCTION_" << OF.Name;
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -1119,7 +1221,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
- TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall);
+ TII.insertOutlinerPrologue(MBB, MF, OF.MInfo);
// Copy over the instructions for the function using the integer mappings in
// its sequence.
@@ -1134,21 +1236,19 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
MBB.insert(MBB.end(), NewMI);
}
- TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall);
+ TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo);
return &MF;
}
-bool MachineOutliner::outline(Module &M,
- const ArrayRef<Candidate> &CandidateList,
- std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper) {
+bool MachineOutliner::outline(
+ Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
bool OutlinedSomething = false;
-
// Replace the candidates with calls to their respective outlined functions.
- for (const Candidate &C : CandidateList) {
-
+ for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
+ Candidate &C = *Cptr;
// Was the candidate removed during pruneOverlaps?
if (!C.InCandidateList)
continue;
@@ -1157,14 +1257,15 @@ bool MachineOutliner::outline(Module &M,
OutlinedFunction &OF = FunctionList[C.FunctionIdx];
// Was its OutlinedFunction made unbeneficial during pruneOverlaps?
- if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
+ if (OF.getBenefit() < 1)
continue;
// If not, then outline it.
- assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
- MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
- unsigned EndIdx = C.StartIdx + C.Len - 1;
+ assert(C.getStartIdx() < Mapper.InstrList.size() &&
+ "Candidate out of bounds!");
+ MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent();
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()];
+ unsigned EndIdx = C.getEndIdx();
assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
@@ -1175,6 +1276,37 @@ bool MachineOutliner::outline(Module &M,
// Does this candidate have a function yet?
if (!OF.MF) {
OF.MF = createOutlinedFunction(M, OF, Mapper);
+ MachineBasicBlock *MBB = &*OF.MF->begin();
+
+ // Output a remark telling the user that an outlined function was created,
+ // and explaining where it came from.
+ MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
+ MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
+ MBB->findDebugLoc(MBB->begin()), MBB);
+ R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
+ << " instructions by "
+ << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+ << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
+ << " locations. "
+ << "(Found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
+
+ // Skip over things that were pruned.
+ if (!OF.Candidates[i]->InCandidateList)
+ continue;
+
+ R << NV(
+ (Twine("StartLoc") + Twine(i)).str(),
+ Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+
+ MORE.emit(R);
FunctionsCreated++;
}
@@ -1183,8 +1315,8 @@ bool MachineOutliner::outline(Module &M,
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert a call to the new function and erase the old sequence.
- TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall);
- StartIt = Mapper.InstrList[C.StartIdx];
+ TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo);
+ StartIt = Mapper.InstrList[C.getStartIdx()];
MBB->erase(StartIt, EndIt);
OutlinedSomething = true;
@@ -1193,9 +1325,7 @@ bool MachineOutliner::outline(Module &M,
NumOutlined++;
}
- DEBUG (
- dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";
- );
+ DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
return OutlinedSomething;
}
@@ -1207,8 +1337,8 @@ bool MachineOutliner::runOnModule(Module &M) {
return false;
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
- const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin())
- .getSubtarget();
+ const TargetSubtargetInfo &STI =
+ MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const TargetInstrInfo *TII = STI.getInstrInfo();
@@ -1219,7 +1349,8 @@ bool MachineOutliner::runOnModule(Module &M) {
MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
// Is the function empty? Safe to outline from?
- if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
+ if (F.empty() ||
+ !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs))
continue;
// If it is, look at each MachineBasicBlock in the function.
@@ -1236,7 +1367,7 @@ bool MachineOutliner::runOnModule(Module &M) {
// Construct a suffix tree, use it to find candidates, and then outline them.
SuffixTree ST(Mapper.UnsignedVec);
- std::vector<Candidate> CandidateList;
+ std::vector<std::shared_ptr<Candidate>> CandidateList;
std::vector<OutlinedFunction> FunctionList;
// Find all of the outlining candidates.
@@ -1244,7 +1375,7 @@ bool MachineOutliner::runOnModule(Module &M) {
buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
// Remove candidates that overlap with other candidates.
- pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+ pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
// Outline each of the candidates and return true if something was outlined.
return outline(M, CandidateList, FunctionList, Mapper);
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 19e9a50e2c43..18cb9af499a6 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -1,4 +1,4 @@
-//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
+//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -73,14 +73,13 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -90,19 +89,23 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/PassAnalysisSupport.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -111,6 +114,7 @@
#include <functional>
#include <iterator>
#include <map>
+#include <memory>
#include <tuple>
#include <utility>
#include <vector>
@@ -169,7 +173,6 @@ namespace {
class NodeSet;
class SMSchedule;
-class SwingSchedulerDAG;
/// The main class in the implementation of the target independent
/// software pipeliner pass.
@@ -185,6 +188,7 @@ public:
#ifndef NDEBUG
static int NumTries;
#endif
+
/// Cache the target analysis information about the loop.
struct LoopInfo {
MachineBasicBlock *TBB = nullptr;
@@ -196,6 +200,7 @@ public:
LoopInfo LI;
static char ID;
+
MachinePipeliner() : MachineFunctionPass(ID) {
initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
}
@@ -222,9 +227,9 @@ private:
class SwingSchedulerDAG : public ScheduleDAGInstrs {
MachinePipeliner &Pass;
/// The minimum initiation interval between iterations for this schedule.
- unsigned MII;
+ unsigned MII = 0;
/// Set to true if a valid pipelined schedule is found for the loop.
- bool Scheduled;
+ bool Scheduled = false;
MachineLoop &Loop;
LiveIntervals &LIS;
const RegisterClassInfo &RegClassInfo;
@@ -234,9 +239,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
ScheduleDAGTopologicalSort Topo;
struct NodeInfo {
- int ASAP;
- int ALAP;
- NodeInfo() : ASAP(0), ALAP(0) {}
+ int ASAP = 0;
+ int ALAP = 0;
+
+ NodeInfo() = default;
};
/// Computed properties for each node in the graph.
std::vector<NodeInfo> ScheduleInfo;
@@ -245,10 +251,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
/// Computed node ordering for scheduling.
SetVector<SUnit *> NodeOrder;
- typedef SmallVector<NodeSet, 8> NodeSetType;
- typedef DenseMap<unsigned, unsigned> ValueMapTy;
- typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
- typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
+ using NodeSetType = SmallVector<NodeSet, 8>;
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
/// Instructions to change when emitting the final schedule.
DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
@@ -272,8 +278,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
public:
Circuits(std::vector<SUnit> &SUs)
- : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
- AdjK(SUs.size()) {}
+ : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
+
/// Reset the data structures used in the circuit algorithm.
void reset() {
Stack.clear();
@@ -281,6 +287,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
NumPaths = 0;
}
+
void createAdjacencyStructure(SwingSchedulerDAG *DAG);
bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
void unblock(int U);
@@ -289,9 +296,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
public:
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
const RegisterClassInfo &rci)
- : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
- Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
- Topo(SUnits, &ExitSU) {
+ : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
+ RegClassInfo(rci), Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
}
@@ -363,8 +369,9 @@ public:
/// Set the Minimum Initiation Interval for this schedule attempt.
void setMII(unsigned mii) { MII = mii; }
- MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
- bool UpdateDAG = false);
+ void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+ void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
/// Return the new base register that was stored away for the changed
/// instruction.
@@ -455,7 +462,7 @@ private:
/// that assigns a priority to the set.
class NodeSet {
SetVector<SUnit *> Nodes;
- bool HasRecurrence;
+ bool HasRecurrence = false;
unsigned RecMII = 0;
int MaxMOV = 0;
int MaxDepth = 0;
@@ -463,10 +470,9 @@ class NodeSet {
SUnit *ExceedPressure = nullptr;
public:
- typedef SetVector<SUnit *>::const_iterator iterator;
-
- NodeSet() : Nodes(), HasRecurrence(false) {}
+ using iterator = SetVector<SUnit *>::const_iterator;
+ NodeSet() = default;
NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
bool insert(SUnit *SU) { return Nodes.insert(SU); }
@@ -581,13 +587,13 @@ private:
/// Keep track of the first cycle value in the schedule. It starts
/// as zero, but the algorithm allows negative values.
- int FirstCycle;
+ int FirstCycle = 0;
/// Keep track of the last cycle value in the schedule.
- int LastCycle;
+ int LastCycle = 0;
/// The initiation interval (II) for the schedule.
- int InitiationInterval;
+ int InitiationInterval = 0;
/// Target machine information.
const TargetSubtargetInfo &ST;
@@ -600,11 +606,7 @@ private:
public:
SMSchedule(MachineFunction *mf)
: ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
- Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
- FirstCycle = 0;
- LastCycle = 0;
- InitiationInterval = 0;
- }
+ Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
void reset() {
ScheduledInstrs.clear();
@@ -638,9 +640,9 @@ public:
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
/// Iterators for the cycle to instruction map.
- typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
- typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
- const_sched_iterator;
+ using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
+ using const_sched_iterator =
+ DenseMap<int, std::deque<SUnit *>>::const_iterator;
/// Return true if the instruction is scheduled at the specified stage.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
@@ -715,6 +717,7 @@ char MachinePipeliner::ID = 0;
int MachinePipeliner::NumTries = 0;
#endif
char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -726,13 +729,13 @@ INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
/// The "main" function for implementing Swing Modulo Scheduling.
bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(*mf.getFunction()))
+ if (skipFunction(mf.getFunction()))
return false;
if (!EnableSWP)
return false;
- if (mf.getFunction()->getAttributes().hasAttribute(
+ if (mf.getFunction().getAttributes().hasAttribute(
AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -1256,6 +1259,8 @@ struct FuncUnitSorter {
const InstrItineraryData *InstrItins;
DenseMap<unsigned, unsigned> Resources;
+ FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+
// Compute the number of functional unit alternatives needed
// at each stage, and take the minimum value. We prioritize the
// instructions by the least number of choices first.
@@ -1291,7 +1296,6 @@ struct FuncUnitSorter {
}
}
- FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
/// Return true if IS1 has less priority than IS2.
bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
unsigned F1 = 0, F2 = 0;
@@ -1384,7 +1388,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
unsigned RecMII = 0;
for (NodeSet &Nodes : NodeSets) {
- if (Nodes.size() == 0)
+ if (Nodes.empty())
continue;
unsigned Delay = Nodes.size() - 1;
@@ -1554,7 +1558,6 @@ static bool ignoreDependence(const SDep &D, bool isPred) {
/// D - Depth of each node.
/// H - Height of each node.
void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
-
ScheduleInfo.resize(SUnits.size());
DEBUG({
@@ -1651,7 +1654,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.insert(IS->getSUnit());
}
}
- return Preds.size() > 0;
+ return !Preds.empty();
}
/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
@@ -1683,7 +1686,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
Succs.insert(PI->getSUnit());
}
}
- return Succs.size() > 0;
+ return !Succs.empty();
}
/// Return true if there is a path from the specified node to any of the nodes
@@ -1868,7 +1871,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
Visited.clear();
computePath(NI, Path, NodesAdded, I, Visited);
}
- if (Path.size() > 0)
+ if (!Path.empty())
I.insert(Path.begin(), Path.end());
}
// Add the nodes from the previous node set to the current node set.
@@ -1879,7 +1882,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
Visited.clear();
computePath(NI, Path, I, NodesAdded, Visited);
}
- if (Path.size() > 0)
+ if (!Path.empty())
I.insert(Path.begin(), Path.end());
}
NodesAdded.insert(I.begin(), I.end());
@@ -1892,7 +1895,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
if (succ_L(NodesAdded, N))
for (SUnit *I : N)
addConnectedNodes(I, NewSet, NodesAdded);
- if (NewSet.size() > 0)
+ if (!NewSet.empty())
NodeSets.push_back(NewSet);
// Create a new node set with the connected nodes of any predecessor of a node
@@ -1901,7 +1904,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
if (pred_L(NodesAdded, N))
for (SUnit *I : N)
addConnectedNodes(I, NewSet, NodesAdded);
- if (NewSet.size() > 0)
+ if (!NewSet.empty())
NodeSets.push_back(NewSet);
// Create new nodes sets with the connected nodes any any remaining node that
@@ -1911,7 +1914,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
if (NodesAdded.count(SU) == 0) {
NewSet.clear();
addConnectedNodes(SU, NewSet, NodesAdded);
- if (NewSet.size() > 0)
+ if (!NewSet.empty())
NodeSets.push_back(NewSet);
}
}
@@ -1976,7 +1979,7 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
for (NodeSetType::iterator J = I + 1; J != E;) {
J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
- if (J->size() == 0) {
+ if (J->empty()) {
NodeSets.erase(J);
E = NodeSets.end();
} else {
@@ -2147,8 +2150,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
/// Process the nodes in the computed order and create the pipelined schedule
/// of the instructions, if possible. Return true if a schedule is found.
bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
-
- if (NodeOrder.size() == 0)
+ if (NodeOrder.empty())
return false;
bool scheduleFound = false;
@@ -2325,7 +2327,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
ValueMapTy *VRMap,
MBBVectorTy &PrologBBs) {
MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
- assert(PreheaderBB != NULL &&
+ assert(PreheaderBB != nullptr &&
"Need to add code to handle loops w/o preheader");
MachineBasicBlock *PredBB = PreheaderBB;
InstrMapTy InstrMap;
@@ -3352,7 +3354,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
// Look for the Phi instruction.
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
MachineInstr *Phi = MRI.getVRegDef(BaseReg);
if (!Phi || !Phi->isPHI())
return false;
@@ -3389,9 +3391,8 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
/// Apply changes to the instruction if needed. The changes are need
/// to improve the scheduling and depend up on the final schedule.
-MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
- SMSchedule &Schedule,
- bool UpdateDAG) {
+void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+ SMSchedule &Schedule) {
SUnit *SU = getSUnit(MI);
DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
InstrChanges.find(SU);
@@ -3399,7 +3400,7 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
std::pair<unsigned, int64_t> RegAndOffset = It->second;
unsigned BasePos, OffsetPos;
if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
- return nullptr;
+ return;
unsigned BaseReg = MI->getOperand(BasePos).getReg();
MachineInstr *LoopDef = findDefInLoop(BaseReg);
int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
@@ -3417,15 +3418,11 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
int64_t NewOffset =
MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
NewMI->getOperand(OffsetPos).setImm(NewOffset);
- if (UpdateDAG) {
- SU->setInstr(NewMI);
- MISUnitMap[NewMI] = SU;
- }
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
NewMIs.insert(NewMI);
- return NewMI;
}
}
- return nullptr;
}
/// Return true for an order dependence that is loop carried potentially.
@@ -3871,6 +3868,58 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
return true;
}
+/// Attempt to fix the degenerate cases when the instruction serialization
+/// causes the register lifetimes to overlap. For example,
+/// p' = store_pi(p, b)
+/// = load p, offset
+/// In this case p and p' overlap, which means that two registers are needed.
+/// Instead, this function changes the load to use p' and updates the offset.
+void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
+ unsigned OverlapReg = 0;
+ unsigned NewBaseReg = 0;
+ for (SUnit *SU : Instrs) {
+ MachineInstr *MI = SU->getInstr();
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Look for an instruction that uses p. The instruction occurs in the
+ // same cycle but occurs later in the serialized order.
+ if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
+ // Check that the instruction appears in the InstrChanges structure,
+ // which contains instructions that can have the offset updated.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ unsigned BasePos, OffsetPos;
+ // Update the base register and adjust the offset.
+ if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ NewMI->getOperand(BasePos).setReg(NewBaseReg);
+ int64_t NewOffset =
+ MI->getOperand(OffsetPos).getImm() - It->second.second;
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
+ NewMIs.insert(NewMI);
+ }
+ }
+ OverlapReg = 0;
+ NewBaseReg = 0;
+ break;
+ }
+ // Look for an instruction of the form p' = op(p), which uses and defines
+ // two virtual registers that get allocated to the same physical register.
+ unsigned TiedUseIdx = 0;
+ if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
+ // OverlapReg is p in the example above.
+ OverlapReg = MI->getOperand(TiedUseIdx).getReg();
+ // NewBaseReg is p' in the example above.
+ NewBaseReg = MI->getOperand(i).getReg();
+ break;
+ }
+ }
+ }
+}
+
/// After the schedule has been formed, call this function to combine
/// the instructions from the different stages/cycles. That is, this
/// function creates a schedule that represents a single iteration.
@@ -3931,7 +3980,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// map. We need to use the new registers to create the correct order.
for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
SUnit *SU = &SSD->SUnits[i];
- SSD->applyInstrChange(SU->getInstr(), *this, true);
+ SSD->applyInstrChange(SU->getInstr(), *this);
}
// Reorder the instructions in each cycle to fix and improve the
@@ -3955,11 +4004,13 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// Replace the old order with the new order.
cycleInstrs.swap(newOrderZC);
cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ SSD->fixupRegisterOverlaps(cycleInstrs);
}
DEBUG(dump(););
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the schedule information to the given output.
void SMSchedule::print(raw_ostream &os) const {
// Iterate over each cycle.
@@ -3975,7 +4026,6 @@ void SMSchedule::print(raw_ostream &os) const {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Utility function used for debugging to print the schedule.
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
#endif
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index be06053f0040..b82ab02a6e6c 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -19,6 +19,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -28,9 +31,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
using namespace llvm;
@@ -183,7 +183,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
MachineOperand *MO = &M;
MachineInstr *MI = MO->getParent();
if (!MI) {
- errs() << PrintReg(Reg, getTargetRegisterInfo())
+ errs() << printReg(Reg, getTargetRegisterInfo())
<< " use list MachineOperand " << MO
<< " has no parent instruction.\n";
Valid = false;
@@ -192,19 +192,19 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
MachineOperand *MO0 = &MI->getOperand(0);
unsigned NumOps = MI->getNumOperands();
if (!(MO >= MO0 && MO < MO0+NumOps)) {
- errs() << PrintReg(Reg, getTargetRegisterInfo())
+ errs() << printReg(Reg, getTargetRegisterInfo())
<< " use list MachineOperand " << MO
<< " doesn't belong to parent MI: " << *MI;
Valid = false;
}
if (!MO->isReg()) {
- errs() << PrintReg(Reg, getTargetRegisterInfo())
+ errs() << printReg(Reg, getTargetRegisterInfo())
<< " MachineOperand " << MO << ": " << *MO
<< " is not a register\n";
Valid = false;
}
if (MO->getReg() != Reg) {
- errs() << PrintReg(Reg, getTargetRegisterInfo())
+ errs() << printReg(Reg, getTargetRegisterInfo())
<< " use-list MachineOperand " << MO << ": "
<< *MO << " is the wrong register\n";
Valid = false;
@@ -428,8 +428,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
// Emit the copies into the top of the block.
for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
if (LiveIns[i].second) {
- if (use_empty(LiveIns[i].second)) {
- // The livein has no uses. Drop it.
+ if (use_nodbg_empty(LiveIns[i].second)) {
+ // The livein has no non-dbg uses. Drop it.
//
// It would be preferable to have isel avoid creating live-in
// records for unused arguments in the first place, but it's
@@ -487,6 +487,13 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
return true;
}
+bool
+MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const {
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ return isConstantPhysReg(PhysReg) ||
+ TRI->isCallerPreservedPhysReg(PhysReg, *MF);
+}
+
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
@@ -524,7 +531,7 @@ static bool isNoReturnDef(const MachineOperand &MO) {
const MachineFunction &MF = *MBB.getParent();
// We need to keep correct unwind information even if the function will
// not return, since the runtime may need it.
- if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
+ if (MF.getFunction().hasFnAttribute(Attribute::UWTable))
return false;
const Function *Called = getCalledFunction(MI);
return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index e9b47559309f..36844e9fb30a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -15,31 +15,36 @@
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "machine-ssaupdater"
-typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+using AvailableValsTy = DenseMap<MachineBasicBlock *, unsigned>;
+
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
SmallVectorImpl<MachineInstr*> *NewPHI)
- : AV(nullptr), InsertedPHIs(NewPHI) {
- TII = MF.getSubtarget().getInstrInfo();
- MRI = &MF.getRegInfo();
-}
+ : InsertedPHIs(NewPHI), TII(MF.getSubtarget().getInstrInfo()),
+ MRI(&MF.getRegInfo()) {}
MachineSSAUpdater::~MachineSSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
@@ -77,7 +82,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
static
unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
- SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) {
+ SmallVectorImpl<std::pair<MachineBasicBlock *, unsigned>> &PredValues) {
if (BB->empty())
return 0;
@@ -136,7 +141,6 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
/// their respective blocks. However, the use of X happens in the *middle* of
/// a block. Because of this, we need to insert a new PHI node in SomeBB to
/// merge the appropriate values, and this value isn't live out of the block.
-///
unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
@@ -233,14 +237,15 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
/// template, specialized for MachineSSAUpdater.
namespace llvm {
+
template<>
class SSAUpdaterTraits<MachineSSAUpdater> {
public:
- typedef MachineBasicBlock BlkT;
- typedef unsigned ValT;
- typedef MachineInstr PhiT;
+ using BlkT = MachineBasicBlock;
+ using ValT = unsigned;
+ using PhiT = MachineInstr;
+ using BlkSucc_iterator = MachineBasicBlock::succ_iterator;
- typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
@@ -259,12 +264,16 @@ public:
PHI_iterator &operator++() { idx += 2; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+
MachineBasicBlock *getIncomingBlock() {
return PHI->getOperand(idx+1).getMBB();
}
};
+
static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+
static inline PHI_iterator PHI_end(PhiT *PHI) {
return PHI_iterator(PHI, true);
}
@@ -309,7 +318,6 @@ public:
}
/// InstrIsPHI - Check if an instruction is a PHI.
- ///
static MachineInstr *InstrIsPHI(MachineInstr *I) {
if (I && I->isPHI())
return I;
@@ -338,7 +346,7 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
/// for the specified BB and if so, return it. If not, construct SSA form by
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index eaba9a58557c..e15eb658a05c 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -22,7 +22,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -42,8 +42,12 @@
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -52,10 +56,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -98,7 +98,7 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
- cl::desc("Only schedule this MBB#"));
+ cl::desc("Only schedule this MBB#"));
#else
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
@@ -200,8 +200,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
-MachineScheduler::MachineScheduler()
-: MachineSchedulerBase(ID) {
+MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
}
@@ -225,8 +224,7 @@ char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
INITIALIZE_PASS(PostMachineScheduler, "postmisched",
"PostRA Machine Instruction Scheduler", false, false)
-PostMachineScheduler::PostMachineScheduler()
-: MachineSchedulerBase(ID) {
+PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
}
@@ -353,7 +351,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(*mf.getFunction()))
+ if (skipFunction(mf.getFunction()))
return false;
if (EnableMachineSched.getNumOccurrences()) {
@@ -391,7 +389,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(*mf.getFunction()))
+ if (skipFunction(mf.getFunction()))
return false;
if (EnablePostRAMachineSched.getNumOccurrences()) {
@@ -405,6 +403,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// Initialize the context of the pass.
MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
PassConfig = &getAnalysis<TargetPassConfig>();
if (VerifyScheduling)
@@ -437,11 +436,67 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
}
+/// A region of an MBB for scheduling.
+namespace {
+struct SchedRegion {
+ /// RegionBegin is the first instruction in the scheduling region, and
+ /// RegionEnd is either MBB->end() or the scheduling boundary after the
+ /// last instruction in the scheduling region. These iterators cannot refer
+ /// to instructions outside of the identified scheduling region because
+ /// those may be reordered before scheduling this region.
+ MachineBasicBlock::iterator RegionBegin;
+ MachineBasicBlock::iterator RegionEnd;
+ unsigned NumRegionInstrs;
+
+ SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
+ unsigned N) :
+ RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
+};
+} // end anonymous namespace
+
+using MBBRegionsVector = SmallVector<SchedRegion, 16>;
+
+static void
+getSchedRegions(MachineBasicBlock *MBB,
+ MBBRegionsVector &Regions,
+ bool RegionsTopDown) {
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ MachineBasicBlock::iterator I = nullptr;
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = I) {
+
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end() ||
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
+ --RegionEnd;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ unsigned NumRegionInstrs = 0;
+ I = RegionEnd;
+ for (;I != MBB->begin(); --I) {
+ MachineInstr &MI = *std::prev(I);
+ if (isSchedBoundary(&MI, &*MBB, MF, TII))
+ break;
+ if (!MI.isDebugValue())
+ // MBB::size() uses instr_iterator to count. Here we need a bundle to
+ // count as a single instruction.
+ ++NumRegionInstrs;
+ }
+
+ Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+ }
+
+ if (RegionsTopDown)
+ std::reverse(Regions.begin(), Regions.end());
+}
+
/// Main driver for both MachineScheduler and PostMachineScheduler.
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
bool FixKillFlags) {
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
// Visit all machine basic blocks.
//
// TODO: Visit blocks in global postorder or postorder within the bottom-up
@@ -459,39 +514,28 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
continue;
#endif
- // Break the block into scheduling regions [I, RegionEnd), and schedule each
- // region as soon as it is discovered. RegionEnd points the scheduling
- // boundary at the bottom of the region. The DAG does not include RegionEnd,
- // but the region does (i.e. the next RegionEnd is above the previous
- // RegionBegin). If the current block has no terminator then RegionEnd ==
- // MBB->end() for the bottom region.
+ // Break the block into scheduling regions [I, RegionEnd). RegionEnd
+ // points to the scheduling boundary at the bottom of the region. The DAG
+ // does not include RegionEnd, but the region does (i.e. the next
+ // RegionEnd is above the previous RegionBegin). If the current block has
+ // no terminator then RegionEnd == MBB->end() for the bottom region.
+ //
+ // All the regions of MBB are first found and stored in MBBRegions, which
+ // will be processed (MBB) top-down if initialized with true.
//
// The Scheduler may insert instructions during either schedule() or
// exitRegion(), even for empty regions. So the local iterators 'I' and
- // 'RegionEnd' are invalid across these calls.
- //
- // MBB::size() uses instr_iterator to count. Here we need a bundle to count
- // as a single instruction.
- for(MachineBasicBlock::iterator RegionEnd = MBB->end();
- RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
-
- // Avoid decrementing RegionEnd for blocks with no terminator.
- if (RegionEnd != MBB->end() ||
- isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
- --RegionEnd;
- }
+ // 'RegionEnd' are invalid across these calls. Instructions must not be
+ // added to other regions than the current one without updating MBBRegions.
+
+ MBBRegionsVector MBBRegions;
+ getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
+ for (MBBRegionsVector::iterator R = MBBRegions.begin();
+ R != MBBRegions.end(); ++R) {
+ MachineBasicBlock::iterator I = R->RegionBegin;
+ MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
+ unsigned NumRegionInstrs = R->NumRegionInstrs;
- // The next region starts above the previous region. Look backward in the
- // instruction stream until we find the nearest boundary.
- unsigned NumRegionInstrs = 0;
- MachineBasicBlock::iterator I = RegionEnd;
- for (; I != MBB->begin(); --I) {
- MachineInstr &MI = *std::prev(I);
- if (isSchedBoundary(&MI, &*MBB, MF, TII))
- break;
- if (!MI.isDebugValue())
- ++NumRegionInstrs;
- }
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
@@ -504,28 +548,23 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
continue;
}
DEBUG(dbgs() << "********** MI Scheduling **********\n");
- DEBUG(dbgs() << MF->getName()
- << ":BB#" << MBB->getNumber() << " " << MBB->getName()
- << "\n From: " << *I << " To: ";
+ DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
+ << MBB->getName() << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
- errs() << ":BB# " << MBB->getNumber();
+ errs() << ":%bb. " << MBB->getNumber();
errs() << " " << MBB->getName() << " \n";
}
// Schedule a region: possibly reorder instructions.
- // This invalidates 'RegionEnd' and 'I'.
+ // This invalidates the original region iterators.
Scheduler.schedule();
// Close the current region.
Scheduler.exitRegion();
-
- // Scheduling has invalidated the current iterator 'I'. Ask the
- // scheduler for the top of it's scheduled region.
- RegionEnd = Scheduler.begin();
}
Scheduler.finishBlock();
// FIXME: Ideally, no further passes should rely on kill flags. However,
@@ -650,6 +689,16 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
releasePred(SU, &Pred);
}
+void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
+ ScheduleDAGInstrs::startBlock(bb);
+ SchedImpl->enterMBB(bb);
+}
+
+void ScheduleDAGMI::finishBlock() {
+ SchedImpl->leaveMBB();
+ ScheduleDAGInstrs::finishBlock();
+}
+
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
/// crossing a scheduling boundary. [begin, end) includes all instructions in
/// the region, including the boundary itself and single-instruction regions
@@ -773,11 +822,11 @@ void ScheduleDAGMI::schedule() {
placeDebugValues();
DEBUG({
- unsigned BBNum = begin()->getParent()->getNumber();
- dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
/// Apply each ScheduleDAGMutation step in order.
@@ -1004,7 +1053,10 @@ void ScheduleDAGMILive::initRegPressure() {
dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
);
- assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+ assert((BotRPTracker.getPos() == RegionEnd ||
+ (RegionEnd->isDebugValue() &&
+ BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
+ "Can't find the region bottom");
// Cache the list of excess pressure sets in this region. This will also track
// the max pressure in the scheduled code for these sets.
@@ -1080,7 +1132,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
PDiff.addPressureChange(Reg, Decrement, &MRI);
DEBUG(
dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
- << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+ << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
<< ' ' << *SU.getInstr();
dbgs() << " to ";
PDiff.dump(*TRI);
@@ -1088,7 +1140,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
}
} else {
assert(P.LaneMask.any());
- DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
// This may be called before CurrentBottom has been initialized. However,
// BotRPTracker must have a valid position. We want the value live into the
// instruction or live out of the block, so ask for the previous
@@ -1211,11 +1263,11 @@ void ScheduleDAGMILive::schedule() {
placeDebugValues();
DEBUG({
- unsigned BBNum = begin()->getParent()->getNumber();
- dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
/// Build the DAG and setup three register pressure trackers.
@@ -1410,7 +1462,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
RegOpers.detectDeadDefs(*MI, *LIS);
}
- BotRPTracker.recedeSkipDebugValues();
+ if (BotRPTracker.getPos() != CurrentBottom)
+ BotRPTracker.recedeSkipDebugValues();
SmallVector<RegisterMaskPair, 8> LiveUses;
BotRPTracker.recede(RegOpers, &LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
@@ -1511,14 +1564,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
std::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
- if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
- ClusterLength = 1;
- continue;
- }
-
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+ if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
+ *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
ClusterLength) &&
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -1541,7 +1590,6 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
-
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
// Map DAG NodeNum to store chain ID.
@@ -1587,6 +1635,7 @@ namespace {
class CopyConstrain : public ScheduleDAGMutation {
// Transient state.
SlotIndex RegionBeginIdx;
+
// RegionEndIdx is the slot index of the last non-debug instruction in the
// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
SlotIndex RegionEndIdx;
@@ -1785,6 +1834,13 @@ static const unsigned InvalidCycle = ~0U;
SchedBoundary::~SchedBoundary() { delete HazardRec; }
+/// Given a Count of resource usage and a Latency value, return true if a
+/// SchedBoundary becomes resource limited.
+static bool checkResourceLimit(unsigned LFactor, unsigned Count,
+ unsigned Latency) {
+ return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+}
+
void SchedBoundary::reset() {
// A new HazardRec is created for each DAG and owned by SchedBoundary.
// Destroying and reconstructing it is very expensive though. So keep
@@ -1916,16 +1972,18 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
- for (TargetSchedModel::ProcResIter
- PI = SchedModel->getWriteProcResBegin(SC),
- PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+ for (const MCWriteProcResEntry &PE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC))) {
+ unsigned ResIdx = PE.ProcResourceIdx;
+ unsigned Cycles = PE.Cycles;
+ unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
- MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+ MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
- << SchedModel->getResourceName(PI->ProcResourceIdx)
+ << SchedModel->getResourceName(ResIdx)
<< "=" << NRCycle << "c\n");
return true;
}
@@ -2037,10 +2095,9 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
}
}
CheckPending = true;
- unsigned LFactor = SchedModel->getLatencyFactor();
IsResourceLimited =
- (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
- > (int)LFactor;
+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+ getScheduledLatency());
DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
}
@@ -2193,16 +2250,15 @@ void SchedBoundary::bumpNode(SUnit *SU) {
<< " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
}
// If we stall for any reason, bump the cycle.
- if (NextCycle > CurrCycle) {
+ if (NextCycle > CurrCycle)
bumpCycle(NextCycle);
- } else {
+ else
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
// resource limited. If a stall occurred, bumpCycle does this.
- unsigned LFactor = SchedModel->getLatencyFactor();
IsResourceLimited =
- (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
- > (int)LFactor;
- }
+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+ getScheduledLatency());
+
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
// one cycle. Since we commonly reach the max MOps here, opportunistically
@@ -2317,7 +2373,7 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
ResCount = getResourceCount(ZoneCritResIdx);
} else {
ResFactor = SchedModel->getMicroOpFactor();
- ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
+ ResCount = RetiredMOps * ResFactor;
}
unsigned LFactor = SchedModel->getLatencyFactor();
dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
@@ -2387,10 +2443,10 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
bool OtherResLimited = false;
- if (SchedModel->hasInstrSchedModel()) {
- unsigned LFactor = SchedModel->getLatencyFactor();
- OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
- }
+ if (SchedModel->hasInstrSchedModel())
+ OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
+ OtherCount, RemLatency);
+
// Schedule aggressively for latency in PostRA mode. We don't check for
// acyclic latency during PostRA, and highly out-of-order processors will
// skip PostRA scheduling.
@@ -2605,7 +2661,7 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
- const MachineFunction &MF = *Begin->getParent()->getParent();
+ const MachineFunction &MF = *Begin->getMF();
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Avoid setting up the register pressure tracker for small regions to save
@@ -3199,7 +3255,6 @@ void PostGenericScheduler::registerRoots() {
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
-
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
@@ -3438,6 +3493,7 @@ class InstructionShuffler : public MachineSchedStrategy {
// instructions to be scheduled first.
PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
TopQ;
+
// When scheduling bottom-up, use greater-than as the queue priority.
PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
BottomQ;
@@ -3554,6 +3610,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
SS << " I:" << DFS->getNumInstrs(SU);
return SS.str();
}
+
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
return G->getGraphNodeLabel(SU);
}
@@ -3577,7 +3634,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
/// rendered using 'dot'.
-///
void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
#ifndef NDEBUG
ViewGraph(this, Name, false, Title);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 79e3fea3f90c..bedfdd84b1ca 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -1,4 +1,4 @@
-//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,6 +18,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -32,14 +33,17 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -93,12 +97,12 @@ namespace {
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
- SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit;
+ SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
SparseBitVector<> RegsToClearKillFlags;
- typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>
- AllSuccsCache;
+ using AllSuccsCache =
+ std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
public:
static char ID; // Pass identification
@@ -133,6 +137,7 @@ namespace {
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
+
/// \brief Postpone the splitting of the given critical
/// edge (\p From, \p To).
///
@@ -150,6 +155,7 @@ namespace {
MachineBasicBlock *To,
bool BreakPHIEdge);
bool SinkInstruction(MachineInstr &MI, bool &SawStore,
+
AllSuccsCache &AllSuccessors);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
@@ -172,7 +178,9 @@ namespace {
} // end anonymous namespace
char MachineSinking::ID = 0;
+
char &llvm::MachineSinkingID = MachineSinking::ID;
+
INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -236,17 +244,17 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
// into and they are all PHI nodes. In this case, machine-sink must break
// the critical edge first. e.g.
//
- // BB#1: derived from LLVM BB %bb4.preheader
- // Predecessors according to CFG: BB#0
+ // %bb.1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: %bb.0
// ...
- // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
// ...
- // JE_4 <BB#37>, %EFLAGS<imp-use>
- // Successors according to CFG: BB#37 BB#2
+ // JE_4 <%bb.37>, implicit %eflags
+ // Successors according to CFG: %bb.37 %bb.2
//
- // BB#2: derived from LLVM BB %bb.nph
- // Predecessors according to CFG: BB#0 BB#1
- // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ // %bb.2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: %bb.0 %bb.1
+ // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
BreakPHIEdge = true;
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
MachineInstr *UseInst = MO.getParent();
@@ -284,7 +292,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
}
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
DEBUG(dbgs() << "******** Machine Sinking ********\n");
@@ -314,10 +322,10 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
for (auto &Pair : ToSplit) {
auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
if (NewSucc != nullptr) {
- DEBUG(dbgs() << " *** Splitting critical edge:"
- " BB#" << Pair.first->getNumber()
- << " -- BB#" << NewSucc->getNumber()
- << " -- BB#" << Pair.second->getNumber() << '\n');
+ DEBUG(dbgs() << " *** Splitting critical edge: "
+ << printMBBReference(*Pair.first) << " -- "
+ << printMBBReference(*NewSucc) << " -- "
+ << printMBBReference(*Pair.second) << '\n');
MadeChange = true;
++NumSplit;
} else
@@ -453,33 +461,33 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
// It's not always legal to break critical edges and sink the computation
// to the edge.
//
- // BB#1:
+ // %bb.1:
// v1024
- // Beq BB#3
+ // Beq %bb.3
// <fallthrough>
- // BB#2:
+ // %bb.2:
// ... no uses of v1024
// <fallthrough>
- // BB#3:
+ // %bb.3:
// ...
// = v1024
//
- // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
//
- // BB#1:
+ // %bb.1:
// ...
- // Bne BB#2
- // BB#4:
+ // Bne %bb.2
+ // %bb.4:
// v1024 =
- // B BB#3
- // BB#2:
+ // B %bb.3
+ // %bb.2:
// ... no uses of v1024
// <fallthrough>
- // BB#3:
+ // %bb.3:
// ...
// = v1024
//
- // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
// flow. We need to ensure the new basic block where the computation is
// sunk to dominates all the uses.
// It's only legal to break critical edge and sink the computation to the
@@ -570,7 +578,6 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
SmallVector<MachineBasicBlock *, 4> &
MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const {
-
// Do we have the sorted successors in cache ?
auto Succs = AllSuccessors.find(MBB);
if (Succs != AllSuccessors.end())
@@ -711,7 +718,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+ using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
auto *MBB = MI.getParent();
if (MBB->pred_size() != 1)
@@ -784,7 +791,6 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
if (!SuccToSinkTo)
return false;
-
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
@@ -863,11 +869,20 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
SmallVector<MachineInstr *, 2> DbgValuesToSink;
collectDebugValues(MI, DbgValuesToSink);
+ // Merge or erase debug location to ensure consistent stepping in profilers
+ // and debuggers.
+ if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end())
+ MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
+ InsertPos->getDebugLoc()));
+ else
+ MI.setDebugLoc(DebugLoc());
+
+
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
- // Move debug values.
+ // Move previously adjacent debug value instructions to the insert position.
for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
MachineInstr *DbgMI = *DBI;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 6c5abc66fba1..d81c6f8a31e1 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -22,15 +22,15 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -42,6 +42,7 @@ using namespace llvm;
#define DEBUG_TYPE "machine-trace-metrics"
char MachineTraceMetrics::ID = 0;
+
char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
@@ -395,7 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
}
void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
+ << '\n');
BlockInfo[MBB->getNumber()].invalidate();
for (unsigned i = 0; i != TS_NumStrategies; ++i)
if (Ensembles[i])
@@ -475,8 +477,8 @@ public:
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
- << MBB->getNumber() << '\n');
+ DEBUG(dbgs() << "Computing " << getName() << " trace through "
+ << printMBBReference(*MBB) << '\n');
// Set up loop bounds for the backwards post-order traversal.
LoopBounds Bounds(BlockInfo, MTM.Loops);
@@ -484,13 +486,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
Bounds.Downward = false;
Bounds.Visited.clear();
for (auto I : inverse_post_order_ext(MBB, Bounds)) {
- DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the predecessors have been visited, pick the preferred one.
TBI.Pred = pickTracePred(I);
DEBUG({
if (TBI.Pred)
- dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ dbgs() << printMBBReference(*TBI.Pred) << '\n';
else
dbgs() << "null\n";
});
@@ -502,13 +504,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
Bounds.Downward = true;
Bounds.Visited.clear();
for (auto I : post_order_ext(MBB, Bounds)) {
- DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the successors have been visited, pick the preferred one.
TBI.Succ = pickTraceSucc(I);
DEBUG({
if (TBI.Succ)
- dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ dbgs() << printMBBReference(*TBI.Succ) << '\n';
else
dbgs() << "null\n";
});
@@ -529,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
WorkList.push_back(BadMBB);
do {
const MachineBasicBlock *MBB = WorkList.pop_back_val();
- DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
- << " height.\n");
+ DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " height.\n");
// Find any MBB predecessors that have MBB as their preferred successor.
// They are the only ones that need to be invalidated.
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
@@ -554,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
WorkList.push_back(BadMBB);
do {
const MachineBasicBlock *MBB = WorkList.pop_back_val();
- DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
- << " depth.\n");
+ DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " depth.\n");
// Find any MBB successors that have MBB as their preferred predecessor.
// They are the only ones that need to be invalidated.
for (const MachineBasicBlock *Succ : MBB->successors()) {
@@ -694,25 +696,6 @@ static void getPHIDeps(const MachineInstr &UseMI,
}
}
-// Keep track of physreg data dependencies by recording each live register unit.
-// Associate each regunit with an instruction operand. Depending on the
-// direction instructions are scanned, it could be the operand that defined the
-// regunit, or the highest operand to read the regunit.
-namespace {
-
-struct LiveRegUnit {
- unsigned RegUnit;
- unsigned Cycle = 0;
- const MachineInstr *MI = nullptr;
- unsigned Op = 0;
-
- unsigned getSparseSetIndex() const { return RegUnit; }
-
- LiveRegUnit(unsigned RU) : RegUnit(RU) {}
-};
-
-} // end anonymous namespace
-
// Identify physreg dependencies for UseMI, and update the live regunit
// tracking set when scanning instructions downwards.
static void updatePhysDepsDownwards(const MachineInstr *UseMI,
@@ -797,6 +780,59 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
return MaxLen;
}
+void MachineTraceMetrics::Ensemble::
+updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ SmallVector<DataDep, 8> Deps;
+ // Collect all data dependencies.
+ if (UseMI.isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (const DataDep &Dep : Deps) {
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.isUsefulDominator(TBI))
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[&UseMI];
+ MICycles.Depth = Cycle;
+
+ if (TBI.HasValidInstrHeights) {
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+ } else {
+ DEBUG(dbgs() << Cycle << '\t' << UseMI);
+ }
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepths(MachineBasicBlock::iterator Start,
+ MachineBasicBlock::iterator End,
+ SparseSet<LiveRegUnit> &RegUnits) {
+ for (; Start != End; Start++)
+ updateDepth(Start->getParent(), *Start, RegUnits);
+}
+
/// Compute instruction depths for all instructions above or in MBB in its
/// trace. This assumes that the trace through MBB has already been computed.
void MachineTraceMetrics::Ensemble::
@@ -822,10 +858,9 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
// Go through trace blocks in top-down order, stopping after the center block.
- SmallVector<DataDep, 8> Deps;
while (!Stack.empty()) {
MBB = Stack.pop_back_val();
- DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
+ DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrDepths = true;
TBI.CriticalPath = 0;
@@ -848,40 +883,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
for (const auto &UseMI : *MBB) {
- // Collect all data dependencies.
- Deps.clear();
- if (UseMI.isPHI())
- getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
- else if (getDataDeps(UseMI, Deps, MTM.MRI))
- updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
-
- // Filter and process dependencies, computing the earliest issue cycle.
- unsigned Cycle = 0;
- for (const DataDep &Dep : Deps) {
- const TraceBlockInfo&DepTBI =
- BlockInfo[Dep.DefMI->getParent()->getNumber()];
- // Ignore dependencies from outside the current trace.
- if (!DepTBI.isUsefulDominator(TBI))
- continue;
- assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
- unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
- // Add latency if DefMI is a real instruction. Transients get latency 0.
- if (!Dep.DefMI->isTransient())
- DepCycle += MTM.SchedModel
- .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
- Cycle = std::max(Cycle, DepCycle);
- }
- // Remember the instruction depth.
- InstrCycles &MICycles = Cycles[&UseMI];
- MICycles.Depth = Cycle;
-
- if (!TBI.HasValidInstrHeights) {
- DEBUG(dbgs() << Cycle << '\t' << UseMI);
- continue;
- }
- // Update critical path length.
- TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+ updateDepth(TBI, UseMI, RegUnits);
}
}
}
@@ -945,7 +947,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
return Height;
}
-typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
// Push the height of DefMI upwards if required to match UseMI.
// Return true if this is the first time DefMI was seen.
@@ -1043,7 +1045,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
SmallVector<DataDep, 8> Deps;
for (;!Stack.empty(); Stack.pop_back()) {
MBB = Stack.back();
- DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrHeights = true;
TBI.CriticalPath = 0;
@@ -1130,18 +1132,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// Update virtual live-in heights. They were added by addLiveIns() with a 0
// height because the final height isn't known until now.
- DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
for (LiveInReg &LIR : TBI.LiveIns) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
LIR.Height = Heights.lookup(DefMI);
- DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
}
// Transfer the live regunits to the live-in list.
for (SparseSet<LiveRegUnit>::const_iterator
RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
- DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI)
<< '@' << RI->Cycle);
}
DEBUG(dbgs() << '\n');
@@ -1288,7 +1290,7 @@ bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
OS << getName() << " ensemble:\n";
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
- OS << " BB#" << i << '\t';
+ OS << " %bb." << i << '\t';
BlockInfo[i].print(OS);
OS << '\n';
}
@@ -1298,10 +1300,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
if (hasValidDepth()) {
OS << "depth=" << InstrDepth;
if (Pred)
- OS << " pred=BB#" << Pred->getNumber();
+ OS << " pred=" << printMBBReference(*Pred);
else
OS << " pred=null";
- OS << " head=BB#" << Head;
+ OS << " head=%bb." << Head;
if (HasValidInstrDepths)
OS << " +instrs";
} else
@@ -1310,10 +1312,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
if (hasValidHeight()) {
OS << "height=" << InstrHeight;
if (Succ)
- OS << " succ=BB#" << Succ->getNumber();
+ OS << " succ=" << printMBBReference(*Succ);
else
OS << " succ=null";
- OS << " tail=BB#" << Tail;
+ OS << " tail=%bb." << Tail;
if (HasValidInstrHeights)
OS << " +instrs";
} else
@@ -1325,18 +1327,18 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
unsigned MBBNum = &TBI - &TE.BlockInfo[0];
- OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
- << " --> BB#" << TBI.Tail << ':';
+ OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
+ << " --> %bb." << TBI.Tail << ':';
if (TBI.hasValidHeight() && TBI.hasValidDepth())
OS << ' ' << getInstrCount() << " instrs.";
if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
OS << ' ' << TBI.CriticalPath << " cycles.";
const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
- OS << "\nBB#" << MBBNum;
+ OS << "\n%bb." << MBBNum;
while (Block->hasValidDepth() && Block->Pred) {
unsigned Num = Block->Pred->getNumber();
- OS << " <- BB#" << Num;
+ OS << " <- " << printMBBReference(*Block->Pred);
Block = &TE.BlockInfo[Num];
}
@@ -1344,7 +1346,7 @@ void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
OS << "\n ";
while (Block->hasValidHeight() && Block->Succ) {
unsigned Num = Block->Succ->getNumber();
- OS << " -> BB#" << Num;
+ OS << " -> " << printMBBReference(*Block->Succ);
Block = &TE.BlockInfo[Num];
}
OS << '\n';
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index c50a95a06505..c9fe7681e280 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,4 +1,4 @@
-//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,41 +23,68 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+
using namespace llvm;
namespace {
- struct MachineVerifier {
- MachineVerifier(Pass *pass, const char *b) :
- PASS(pass),
- Banner(b)
- {}
+ struct MachineVerifier {
+ MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
unsigned verify(MachineFunction &MF);
@@ -75,11 +102,11 @@ namespace {
bool isFunctionRegBankSelected;
bool isFunctionSelected;
- typedef SmallVector<unsigned, 16> RegVector;
- typedef SmallVector<const uint32_t*, 4> RegMaskVector;
- typedef DenseSet<unsigned> RegSet;
- typedef DenseMap<unsigned, const MachineInstr*> RegMap;
- typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
+ using RegVector = SmallVector<unsigned, 16>;
+ using RegMaskVector = SmallVector<const uint32_t *, 4>;
+ using RegSet = DenseSet<unsigned>;
+ using RegMap = DenseMap<unsigned, const MachineInstr *>;
+ using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
const MachineInstr *FirstTerminator;
BlockSet FunctionBlocks;
@@ -101,7 +128,7 @@ namespace {
struct BBInfo {
// Is this MBB reachable from the MF entry point?
- bool reachable;
+ bool reachable = false;
// Vregs that must be live in because they are used without being
// defined. Map value is the user.
@@ -126,7 +153,7 @@ namespace {
// Set versions of block's predecessor and successor lists.
BlockSet Preds, Succs;
- BBInfo() : reachable(false) {}
+ BBInfo() = default;
// Add register to vregsPassed if it belongs there. Return true if
// anything changed.
@@ -237,7 +264,7 @@ namespace {
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
- void checkPHIOps(const MachineBasicBlock *MBB);
+ void checkPHIOps(const MachineBasicBlock &MBB);
void calcRegsRequired();
void verifyLiveVariables();
@@ -259,6 +286,7 @@ namespace {
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
+
const std::string Banner;
MachineVerifierPass(std::string banner = std::string())
@@ -279,9 +307,10 @@ namespace {
}
};
-}
+} // end anonymous namespace
char MachineVerifierPass::ID = 0;
+
INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
"Verify generated machine code", false, false)
@@ -442,9 +471,8 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- errs() << "- basic block: BB#" << MBB->getNumber()
- << ' ' << MBB->getName()
- << " (" << (const void*)MBB << ')';
+ errs() << "- basic block: " << printMBBReference(*MBB) << ' '
+ << MBB->getName() << " (" << (const void *)MBB << ')';
if (Indexes)
errs() << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -499,14 +527,14 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
}
void MachineVerifier::report_context_vreg(unsigned VReg) const {
- errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n';
+ errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
}
void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
report_context_vreg(VRegOrUnit);
} else {
- errs() << "- regunit: " << PrintRegUnit(VRegOrUnit, TRI) << '\n';
+ errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n';
}
}
@@ -590,8 +618,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB has successor that isn't part of the function.", MBB);
if (!MBBInfoMap[*I].Preds.count(MBB)) {
report("Inconsistent CFG", MBB);
- errs() << "MBB is not in the predecessor list of the successor BB#"
- << (*I)->getNumber() << ".\n";
+ errs() << "MBB is not in the predecessor list of the successor "
+ << printMBBReference(*(*I)) << ".\n";
}
}
@@ -602,19 +630,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB has predecessor that isn't part of the function.", MBB);
if (!MBBInfoMap[*I].Succs.count(MBB)) {
report("Inconsistent CFG", MBB);
- errs() << "MBB is not in the successor list of the predecessor BB#"
- << (*I)->getNumber() << ".\n";
+ errs() << "MBB is not in the successor list of the predecessor "
+ << printMBBReference(*(*I)) << ".\n";
}
}
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
const BasicBlock *BB = MBB->getBasicBlock();
- const Function *Fn = MF->getFunction();
+ const Function &F = MF->getFunction();
if (LandingPadSuccs.size() > 1 &&
!(AsmInfo &&
AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
BB && isa<SwitchInst>(BB->getTerminator())) &&
- !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
+ !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
report("MBB has more than one landing pad successor", MBB);
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -926,6 +954,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("Generic instruction accessing memory must have one mem operand",
MI);
break;
+ case TargetOpcode::G_PHI: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isValid() ||
+ !std::all_of(MI->operands_begin() + 1, MI->operands_end(),
+ [this, &DstTy](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ LLT Ty = MRI->getType(MO.getReg());
+ if (!Ty.isValid() || (Ty != DstTy))
+ return false;
+ return true;
+ }))
+ report("Generic Instruction G_PHI has operands with incompatible/missing "
+ "types",
+ MI);
+ break;
+ }
case TargetOpcode::STATEPOINT:
if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
!MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
@@ -1039,101 +1084,112 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Two-address instruction operands must be identical", MO, MONum);
// Check register classes.
- if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
- unsigned SubIdx = MO->getSubReg();
+ unsigned SubIdx = MO->getSubReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (SubIdx) {
- report("Illegal subregister index for physical register", MO, MONum);
- return;
- }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (MONum < MCID.getNumOperands()) {
if (const TargetRegisterClass *DRC =
TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
- errs() << TRI->getName(Reg) << " is not a "
- << TRI->getRegClassName(DRC) << " register.\n";
+ errs() << printReg(Reg, TRI) << " is not a "
+ << TRI->getRegClassName(DRC) << " register.\n";
}
}
- } else {
- // Virtual register.
- const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
- if (!RC) {
- // This is a generic virtual register.
-
- // If we're post-Select, we can't have gvregs anymore.
- if (isFunctionSelected) {
- report("Generic virtual register invalid in a Selected function",
- MO, MONum);
- return;
- }
+ }
+ if (MO->isRenamable() &&
+ ((MO->isDef() && MI->hasExtraDefRegAllocReq()) ||
+ (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) {
+ report("Illegal isRenamable setting for opcode with extra regalloc "
+ "requirements",
+ MO, MONum);
+ return;
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
+ if (!RC) {
+ // This is a generic virtual register.
+
+ // If we're post-Select, we can't have gvregs anymore.
+ if (isFunctionSelected) {
+ report("Generic virtual register invalid in a Selected function",
+ MO, MONum);
+ return;
+ }
- // The gvreg must have a type and it must not have a SubIdx.
- LLT Ty = MRI->getType(Reg);
- if (!Ty.isValid()) {
- report("Generic virtual register must have a valid type", MO,
- MONum);
- return;
- }
+ // The gvreg must have a type and it must not have a SubIdx.
+ LLT Ty = MRI->getType(Reg);
+ if (!Ty.isValid()) {
+ report("Generic virtual register must have a valid type", MO,
+ MONum);
+ return;
+ }
- const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
- // If we're post-RegBankSelect, the gvreg must have a bank.
- if (!RegBank && isFunctionRegBankSelected) {
- report("Generic virtual register must have a bank in a "
- "RegBankSelected function",
- MO, MONum);
- return;
- }
+ // If we're post-RegBankSelect, the gvreg must have a bank.
+ if (!RegBank && isFunctionRegBankSelected) {
+ report("Generic virtual register must have a bank in a "
+ "RegBankSelected function",
+ MO, MONum);
+ return;
+ }
- // Make sure the register fits into its register bank if any.
- if (RegBank && Ty.isValid() &&
- RegBank->getSize() < Ty.getSizeInBits()) {
- report("Register bank is too small for virtual register", MO,
- MONum);
- errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
- << "-bits\n";
- return;
- }
- if (SubIdx) {
- report("Generic virtual register does not subregister index", MO,
- MONum);
- return;
- }
+ // Make sure the register fits into its register bank if any.
+ if (RegBank && Ty.isValid() &&
+ RegBank->getSize() < Ty.getSizeInBits()) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+ << "-bits\n";
+ return;
+ }
+ if (SubIdx) {
+ report("Generic virtual register does not subregister index", MO,
+ MONum);
+ return;
+ }
- // If this is a target specific instruction and this operand
- // has register class constraint, the virtual register must
- // comply to it.
- if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
- TII->getRegClass(MCID, MONum, TRI, *MF)) {
- report("Virtual register does not match instruction constraint", MO,
- MONum);
- errs() << "Expect register class "
- << TRI->getRegClassName(
- TII->getRegClass(MCID, MONum, TRI, *MF))
- << " but got nothing\n";
- return;
- }
+ // If this is a target specific instruction and this operand
+ // has register class constraint, the virtual register must
+ // comply to it.
+ if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+ MONum < MCID.getNumOperands() &&
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ report("Virtual register does not match instruction constraint", MO,
+ MONum);
+ errs() << "Expect register class "
+ << TRI->getRegClassName(
+ TII->getRegClass(MCID, MONum, TRI, *MF))
+ << " but got nothing\n";
+ return;
+ }
- break;
+ break;
+ }
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
}
- if (SubIdx) {
- const TargetRegisterClass *SRC =
- TRI->getSubClassWithSubReg(RC, SubIdx);
- if (!SRC) {
- report("Invalid subregister index for virtual register", MO, MONum);
- errs() << "Register class " << TRI->getRegClassName(RC)
- << " does not support subreg index " << SubIdx << "\n";
- return;
- }
- if (RC != SRC) {
- report("Invalid register class for subregister index", MO, MONum);
- errs() << "Register class " << TRI->getRegClassName(RC)
- << " does not fully support subreg index " << SubIdx << "\n";
- return;
- }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
}
+ }
+ if (MONum < MCID.getNumOperands()) {
if (const TargetRegisterClass *DRC =
TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (SubIdx) {
@@ -1449,8 +1505,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
-void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
-}
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {}
// This function gets called after visiting all instructions in a bundle. The
// argument points to the bundle header.
@@ -1559,32 +1614,66 @@ void MachineVerifier::calcRegsRequired() {
// Check PHI instructions at the beginning of MBB. It is assumed that
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
-void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
SmallPtrSet<const MachineBasicBlock*, 8> seen;
- for (const auto &BBI : *MBB) {
- if (!BBI.isPHI())
+ for (const MachineInstr &Phi : MBB) {
+ if (!Phi.isPHI())
break;
seen.clear();
- for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
- unsigned Reg = BBI.getOperand(i).getReg();
- const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB();
- if (!Pre->isSuccessor(MBB))
+ const MachineOperand &MODef = Phi.getOperand(0);
+ if (!MODef.isReg() || !MODef.isDef()) {
+ report("Expected first PHI operand to be a register def", &MODef, 0);
+ continue;
+ }
+ if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() ||
+ MODef.isEarlyClobber() || MODef.isDebug())
+ report("Unexpected flag on PHI operand", &MODef, 0);
+ unsigned DefReg = MODef.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ report("Expected first PHI operand to be a virtual register", &MODef, 0);
+
+ for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
+ const MachineOperand &MO0 = Phi.getOperand(I);
+ if (!MO0.isReg()) {
+ report("Expected PHI operand to be a register", &MO0, I);
+ continue;
+ }
+ if (MO0.isImplicit() || MO0.isInternalRead() || MO0.isEarlyClobber() ||
+ MO0.isDebug() || MO0.isTied())
+ report("Unexpected flag on PHI operand", &MO0, I);
+
+ const MachineOperand &MO1 = Phi.getOperand(I + 1);
+ if (!MO1.isMBB()) {
+ report("Expected PHI operand to be a basic block", &MO1, I + 1);
+ continue;
+ }
+
+ const MachineBasicBlock &Pre = *MO1.getMBB();
+ if (!Pre.isSuccessor(&MBB)) {
+ report("PHI input is not a predecessor block", &MO1, I + 1);
continue;
- seen.insert(Pre);
- BBInfo &PrInfo = MBBInfoMap[Pre];
- if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
- report("PHI operand is not live-out from predecessor",
- &BBI.getOperand(i), i);
+ }
+
+ if (MInfo.reachable) {
+ seen.insert(&Pre);
+ BBInfo &PrInfo = MBBInfoMap[&Pre];
+ if (!MO0.isUndef() && PrInfo.reachable &&
+ !PrInfo.isLiveOut(MO0.getReg()))
+ report("PHI operand is not live-out from predecessor", &MO0, I);
+ }
}
// Did we see all predecessors?
- for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
- PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
- if (!seen.count(*PrI)) {
- report("Missing PHI operand", &BBI);
- errs() << "BB#" << (*PrI)->getNumber()
- << " is a predecessor according to the CFG.\n";
+ if (MInfo.reachable) {
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ if (!seen.count(Pred)) {
+ report("Missing PHI operand", &Phi);
+ errs() << printMBBReference(*Pred)
+ << " is a predecessor according to the CFG.\n";
+ }
}
}
}
@@ -1593,15 +1682,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
void MachineVerifier::visitMachineFunctionAfter() {
calcRegsPassed();
- for (const auto &MBB : *MF) {
- BBInfo &MInfo = MBBInfoMap[&MBB];
-
- // Skip unreachable MBBs.
- if (!MInfo.reachable)
- continue;
-
- checkPHIOps(&MBB);
- }
+ for (const MachineBasicBlock &MBB : *MF)
+ checkPHIOps(MBB);
// Now check liveness info if available
calcRegsRequired();
@@ -1614,8 +1696,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
++I)
if (MInfo.regsKilled.count(*I)) {
report("Virtual register killed in block, but needed live out.", &MBB);
- errs() << "Virtual register " << PrintReg(*I)
- << " is used after the block.\n";
+ errs() << "Virtual register " << printReg(*I)
+ << " is used after the block.\n";
}
}
@@ -1647,14 +1729,14 @@ void MachineVerifier::verifyLiveVariables() {
if (MInfo.vregsRequired.count(Reg)) {
if (!VI.AliveBlocks.test(MBB.getNumber())) {
report("LiveVariables: Block missing from AliveBlocks", &MBB);
- errs() << "Virtual register " << PrintReg(Reg)
- << " must be live through the block.\n";
+ errs() << "Virtual register " << printReg(Reg)
+ << " must be live through the block.\n";
}
} else {
if (VI.AliveBlocks.test(MBB.getNumber())) {
report("LiveVariables: Block should not be in AliveBlocks", &MBB);
- errs() << "Virtual register " << PrintReg(Reg)
- << " is not needed live through the block.\n";
+ errs() << "Virtual register " << printReg(Reg)
+ << " is not needed live through the block.\n";
}
}
}
@@ -1672,7 +1754,7 @@ void MachineVerifier::verifyLiveIntervals() {
if (!LiveInts->hasInterval(Reg)) {
report("Missing live interval for virtual register", MF);
- errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
+ errs() << printReg(Reg, TRI) << " still has defs or uses\n";
continue;
}
@@ -1887,7 +1969,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (MOI->isDef()) {
if (Sub != 0) {
hasSubRegDef = true;
- // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane
+ // An operand %0:sub0 reads %0:sub1..n. Invert the lane
// mask for subregister defs. Read-undef defs will be handled by
// readsReg below.
SLM = ~SLM;
@@ -1935,7 +2017,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Skip this block.
++MFI;
}
- for (;;) {
+ while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
@@ -1964,8 +2046,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
- errs() << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+ errs() << " live into " << printMBBReference(*MFI) << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
<< PEnd << '\n';
continue;
}
@@ -1974,9 +2056,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (!IsPHI && PVNI != VNI) {
report("Different value live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
- errs() << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
- << " live into BB#" << MFI->getNumber() << '@'
+ errs() << "Valno #" << PVNI->id << " live out of "
+ << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #"
+ << VNI->id << " live into " << printMBBReference(*MFI) << '@'
<< LiveInts->getMBBStartIdx(&*MFI) << '\n';
}
}
@@ -2041,23 +2123,25 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
}
namespace {
+
// FrameSetup and FrameDestroy can have zero adjustment, so using a single
// integer, we can't tell whether it is a FrameSetup or FrameDestroy if the
// value is zero.
// We use a bool plus an integer to capture the stack state.
struct StackStateOfBB {
- StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false),
- ExitIsSetup(false) { }
+ StackStateOfBB() = default;
StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) :
EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup),
- ExitIsSetup(ExitSetup) { }
+ ExitIsSetup(ExitSetup) {}
+
// Can be negative, which means we are setting up a frame.
- int EntryValue;
- int ExitValue;
- bool EntryIsSetup;
- bool ExitIsSetup;
+ int EntryValue = 0;
+ int ExitValue = 0;
+ bool EntryIsSetup = false;
+ bool ExitIsSetup = false;
};
-}
+
+} // end anonymous namespace
/// Make sure on every path through the CFG, a FrameSetup <n> is always followed
/// by a FrameDestroy <n>, stack adjustments are identical on all
@@ -2073,8 +2157,8 @@ void MachineVerifier::verifyStackFrame() {
df_iterator_default_set<const MachineBasicBlock*> Reachable;
// Visit the MBBs in DFS order.
- for (df_ext_iterator<const MachineFunction*,
- df_iterator_default_set<const MachineBasicBlock*> >
+ for (df_ext_iterator<const MachineFunction *,
+ df_iterator_default_set<const MachineBasicBlock *>>
DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
DFI != DFE; ++DFI) {
const MachineBasicBlock *MBB = *DFI;
@@ -2125,11 +2209,11 @@ void MachineVerifier::verifyStackFrame() {
(SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
report("The exit stack state of a predecessor is inconsistent.", MBB);
- errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
- << SPState[(*I)->getNumber()].ExitValue << ", "
- << SPState[(*I)->getNumber()].ExitIsSetup
- << "), while BB#" << MBB->getNumber() << " has entry state ("
- << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
+ errs() << "Predecessor " << printMBBReference(*(*I))
+ << " has exit state (" << SPState[(*I)->getNumber()].ExitValue
+ << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while "
+ << printMBBReference(*MBB) << " has entry state ("
+ << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
}
}
@@ -2141,11 +2225,11 @@ void MachineVerifier::verifyStackFrame() {
(SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
report("The entry stack state of a successor is inconsistent.", MBB);
- errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
- << SPState[(*I)->getNumber()].EntryValue << ", "
- << SPState[(*I)->getNumber()].EntryIsSetup
- << "), while BB#" << MBB->getNumber() << " has exit state ("
- << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
+ errs() << "Successor " << printMBBReference(*(*I))
+ << " has entry state (" << SPState[(*I)->getNumber()].EntryValue
+ << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while "
+ << printMBBReference(*MBB) << " has exit state ("
+ << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
}
}
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 633a853b2c74..e7f426c469a0 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -19,10 +19,10 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -33,42 +33,74 @@ using namespace llvm;
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+static bool isHazard(const SDep &Dep) {
+ return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
+}
+
+static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
+ // Check that neither instr is already paired with another along the edge
+ // between them.
+ for (SDep &SI : FirstSU.Succs)
+ if (SI.isCluster())
+ return false;
+
+ for (SDep &SI : SecondSU.Preds)
+ if (SI.isCluster())
+ return false;
+ // Though the reachability checks above could be made more generic,
+ // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid,
+ // the extra computation cost makes it less interesting in general cases.
+
// Create a single weak edge between the adjacent instrs. The only effect is
// to cause bottom-up scheduling to heavily prioritize the clustered instrs.
- DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+ if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
+ return false;
- // Adjust the latency between the anchor instr and its
- // predecessors.
- for (SDep &IDep : SecondSU.Preds)
- if (IDep.getSUnit() == &FirstSU)
- IDep.setLatency(0);
+ // Adjust the latency between both instrs.
+ for (SDep &SI : FirstSU.Succs)
+ if (SI.getSUnit() == &SecondSU)
+ SI.setLatency(0);
- // Adjust the latency between the dependent instr and its
- // predecessors.
- for (SDep &IDep : FirstSU.Succs)
- if (IDep.getSUnit() == &SecondSU)
- IDep.setLatency(0);
+ for (SDep &SI : SecondSU.Preds)
+ if (SI.getSUnit() == &FirstSU)
+ SI.setLatency(0);
- DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse ";
+ DEBUG(dbgs() << "Macro fuse: ";
FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
+ // Make data dependencies from the FirstSU also dependent on the SecondSU to
+ // prevent them from being scheduled between the FirstSU and the SecondSU.
if (&SecondSU != &DAG.ExitSU)
- // Make instructions dependent on FirstSU also dependent on SecondSU to
- // prevent them from being scheduled between FirstSU and and SecondSU.
for (const SDep &SI : FirstSU.Succs) {
- if (SI.getSUnit() == &SecondSU)
+ SUnit *SU = SI.getSUnit();
+ if (SI.isWeak() || isHazard(SI) ||
+ SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
+ continue;
+ DEBUG(dbgs() << " Bind ";
+ SecondSU.print(dbgs(), &DAG); dbgs() << " - ";
+ SU->print(dbgs(), &DAG); dbgs() << '\n';);
+ DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
+ }
+
+ // Make the FirstSU also dependent on the dependencies of the SecondSU to
+ // prevent them from being scheduled between the FirstSU and the SecondSU.
+ if (&FirstSU != &DAG.EntrySU)
+ for (const SDep &SI : SecondSU.Preds) {
+ SUnit *SU = SI.getSUnit();
+ if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
continue;
- DEBUG(dbgs() << " Copy Succ ";
- SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';);
- DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ DEBUG(dbgs() << " Bind ";
+ SU->print(dbgs(), &DAG); dbgs() << " - ";
+ FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+ DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
}
++NumFused;
+ return true;
}
namespace {
@@ -116,9 +148,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
// Explorer for fusion candidates among the dependencies of the anchor instr.
for (SDep &Dep : AnchorSU.Preds) {
- // Ignore dependencies that don't enforce ordering.
- if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output ||
- Dep.isWeak())
+ // Ignore dependencies other than data or strong ordering.
+ if (Dep.isWeak() || isHazard(Dep))
continue;
SUnit &DepSU = *Dep.getSUnit();
@@ -129,8 +160,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
continue;
- fuseInstructionPair(DAG, DepSU, AnchorSU);
- return true;
+ if (fuseInstructionPair(DAG, DepSU, AnchorSU))
+ return true;
}
return false;
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index f7aeb4204c5b..8972867ba083 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -1,4 +1,4 @@
-//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,13 +14,17 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
using namespace llvm;
#define DEBUG_TYPE "opt-phis"
@@ -29,12 +33,14 @@ STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
namespace {
+
class OptimizePHIs : public MachineFunctionPass {
MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
public:
static char ID; // Pass identification
+
OptimizePHIs() : MachineFunctionPass(ID) {
initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
}
@@ -47,23 +53,26 @@ namespace {
}
private:
- typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
- typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+ using InstrSet = SmallPtrSet<MachineInstr *, 16>;
+ using InstrSetIterator = SmallPtrSetIterator<MachineInstr *>;
bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
InstrSet &PHIsInCycle);
bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
bool OptimizeBB(MachineBasicBlock &MBB);
};
-}
+
+} // end anonymous namespace
char OptimizePHIs::ID = 0;
+
char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+
INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE,
"Optimize machine instruction PHIs", false, false)
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(*Fn.getFunction()))
+ if (skipFunction(Fn.getFunction()))
return false;
MRI = &Fn.getRegInfo();
@@ -144,7 +153,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
if (PHIsInCycle.size() == 16)
return false;
- for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) {
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DstReg)) {
if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle))
return false;
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 9c898fa40d7e..54c5a940275d 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -1,4 +1,4 @@
-//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,24 +14,35 @@
//===----------------------------------------------------------------------===//
#include "PHIEliminationUtils.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "phi-node-elimination"
@@ -51,6 +62,7 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
namespace {
+
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
LiveVariables *LV;
@@ -58,6 +70,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
+
PHIElimination() : MachineFunctionPass(ID) {
initializePHIEliminationPass(*PassRegistry::getPassRegistry());
}
@@ -68,8 +81,8 @@ namespace {
private:
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
/// in predecessor basic blocks.
- ///
bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+
void LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator LastPHIIt);
@@ -78,7 +91,6 @@ namespace {
/// register which is used in a PHI node. We map that to the BB the
/// vreg is coming from. This is used later to determine when the vreg
/// is killed in the BB.
- ///
void analyzePHINodes(const MachineFunction& Fn);
/// Split critical edges where necessary for good coalescer performance.
@@ -90,8 +102,8 @@ namespace {
bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
- typedef std::pair<unsigned, unsigned> BBVRegPair;
- typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+ using BBVRegPair = std::pair<unsigned, unsigned>;
+ using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
VRegPHIUse VRegPHIUseCount;
@@ -99,17 +111,19 @@ namespace {
SmallPtrSet<MachineInstr*, 4> ImpDefs;
// Map reusable lowered PHI node -> incoming join register.
- typedef DenseMap<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> LoweredPHIMap;
+ using LoweredPHIMap =
+ DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>;
LoweredPHIMap LoweredPHIs;
};
-}
+
+} // end anonymous namespace
STATISTIC(NumLowered, "Number of phis lowered");
STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
+
char& llvm::PHIEliminationID = PHIElimination::ID;
INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
@@ -182,7 +196,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
-///
bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
@@ -219,9 +232,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
return true;
}
-
-/// LowerPHINode - Lower the PHI node at the top of the specified block,
-///
+/// LowerPHINode - Lower the PHI node at the top of the specified block.
void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator LastPHIIt) {
++NumLowered;
@@ -259,7 +270,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
IncomingReg = entry;
reusedIncoming = true;
++NumReused;
- DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+ DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -534,7 +545,6 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// particular, we want to map the number of uses of a virtual register which is
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
-///
void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
for (const auto &MBB : MF)
for (const auto &BBI : MBB) {
@@ -583,9 +593,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
continue;
if (ShouldSplit) {
- DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
- << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
- << ": " << *BBI);
+ DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge "
+ << printMBBReference(*PreMBB) << " -> "
+ << printMBBReference(MBB) << ": " << *BBI);
}
// If Reg is not live-in to MBB, it means it must be live-in to some
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
index 50dd44fa659f..ff8680a0540d 100644
--- a/lib/CodeGen/ParallelCG.cpp
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SplitModule.h"
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index 513e82716564..0957705b19bb 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -16,9 +16,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
using namespace llvm;
@@ -54,11 +54,11 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
}
bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getFunction()->hasFnAttribute("patchable-function"))
+ if (!MF.getFunction().hasFnAttribute("patchable-function"))
return false;
#ifndef NDEBUG
- Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function");
+ Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function");
StringRef PatchType = PatchAttr.getValueAsString();
assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
#endif
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index b13f6b68c420..45078081987a 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -67,6 +67,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -74,20 +75,23 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -170,11 +174,11 @@ namespace {
}
/// \brief Track Def -> Use info used for rewriting copies.
- typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
- RewriteMapTy;
+ using RewriteMapTy =
+ SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>;
/// \brief Sequence of instructions that formulate recurrence cycle.
- typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+ using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
@@ -195,6 +199,7 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
/// \brief Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
/// operand commutable with the tied use operand, find recurrence cycle
@@ -254,7 +259,7 @@ namespace {
/// maintained with CommutePair.
class RecurrenceInstr {
public:
- typedef std::pair<unsigned, unsigned> IndexPair;
+ using IndexPair = std::pair<unsigned, unsigned>;
RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
@@ -277,11 +282,12 @@ namespace {
SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
/// Instruction using the sources in 'RegSrcs'.
- const MachineInstr *Inst;
+ const MachineInstr *Inst = nullptr;
public:
- ValueTrackerResult() : Inst(nullptr) {}
- ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+ ValueTrackerResult() = default;
+
+ ValueTrackerResult(unsigned Reg, unsigned SubReg) {
addSource(Reg, SubReg);
}
@@ -350,13 +356,17 @@ namespace {
class ValueTracker {
private:
/// The current point into the use-def chain.
- const MachineInstr *Def;
+ const MachineInstr *Def = nullptr;
+
/// The index of the definition in Def.
- unsigned DefIdx;
+ unsigned DefIdx = 0;
+
/// The sub register index of the definition.
unsigned DefSubReg;
+
/// The register where the value can be found.
unsigned Reg;
+
/// Specifiy whether or not the value tracking looks through
/// complex instructions. When this is false, the value tracker
/// bails on everything that is not a copy or a bitcast.
@@ -365,8 +375,10 @@ namespace {
/// the ValueTracker class but that would have complicated the code of
/// the users of this class.
bool UseAdvancedTracking;
+
/// MachineRegisterInfo used to perform tracking.
const MachineRegisterInfo &MRI;
+
/// Optional TargetInstrInfo used to perform some complex
/// tracking.
const TargetInstrInfo *TII;
@@ -374,22 +386,29 @@ namespace {
/// \brief Dispatcher to the right underlying implementation of
/// getNextSource.
ValueTrackerResult getNextSourceImpl();
+
/// \brief Specialized version of getNextSource for Copy instructions.
ValueTrackerResult getNextSourceFromCopy();
+
/// \brief Specialized version of getNextSource for Bitcast instructions.
ValueTrackerResult getNextSourceFromBitcast();
+
/// \brief Specialized version of getNextSource for RegSequence
/// instructions.
ValueTrackerResult getNextSourceFromRegSequence();
+
/// \brief Specialized version of getNextSource for InsertSubreg
/// instructions.
ValueTrackerResult getNextSourceFromInsertSubreg();
+
/// \brief Specialized version of getNextSource for ExtractSubreg
/// instructions.
ValueTrackerResult getNextSourceFromExtractSubreg();
+
/// \brief Specialized version of getNextSource for SubregToReg
/// instructions.
ValueTrackerResult getNextSourceFromSubregToReg();
+
/// \brief Specialized version of getNextSource for PHI instructions.
ValueTrackerResult getNextSourceFromPHI();
@@ -410,7 +429,7 @@ namespace {
const MachineRegisterInfo &MRI,
bool UseAdvancedTracking = false,
const TargetInstrInfo *TII = nullptr)
- : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
+ : DefSubReg(DefSubReg), Reg(Reg),
UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
Def = MRI.getVRegDef(Reg);
@@ -453,6 +472,7 @@ namespace {
} // end anonymous namespace
char PeepholeOptimizer::ID = 0;
+
char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
@@ -659,7 +679,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
}
/// \brief Check if a simpler conditional branch can be
-// generated
+/// generated
bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
return TII->optimizeCondBranch(*MI);
}
@@ -805,13 +825,13 @@ class CopyRewriter {
protected:
/// The copy-like instruction.
MachineInstr &CopyLike;
+
/// The index of the source being rewritten.
- unsigned CurrentSrcIdx;
+ unsigned CurrentSrcIdx = 0;
public:
- CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
-
- virtual ~CopyRewriter() {}
+ CopyRewriter(MachineInstr &MI) : CopyLike(MI) {}
+ virtual ~CopyRewriter() = default;
/// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
/// the related value that it affects (TrackReg, TrackSubReg).
@@ -944,6 +964,7 @@ class UncoalescableRewriter : public CopyRewriter {
protected:
const TargetInstrInfo &TII;
MachineRegisterInfo &MRI;
+
/// The number of defs in the bitcast
unsigned NumDefs;
@@ -958,7 +979,6 @@ public:
/// All such sources need to be considered rewritable in order to
/// rewrite a uncoalescable copy-like instruction. This method return
/// each definition that must be checked if rewritable.
- ///
bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
unsigned &TrackReg,
unsigned &TrackSubReg) override {
@@ -1205,7 +1225,7 @@ public:
}
};
-} // end anonymous namespace
+} // end anonymous namespace
/// \brief Get the appropriated CopyRewriter for \p MI.
/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
@@ -1433,10 +1453,10 @@ bool PeepholeOptimizer::foldImmediate(
// only the first copy is considered.
//
// e.g.
-// %vreg1 = COPY %vreg0
-// %vreg2 = COPY %vreg0:sub1
+// %1 = COPY %0
+// %2 = COPY %0:sub1
//
-// Should replace %vreg2 uses with %vreg1:sub1
+// Should replace %2 uses with %1:sub1
bool PeepholeOptimizer::foldRedundantCopy(
MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
DenseMap<unsigned, MachineInstr *> &CopyMIs) {
@@ -1496,7 +1516,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
unsigned DstReg = MI->getOperand(0).getReg();
unsigned SrcReg = MI->getOperand(1).getReg();
if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // %vreg = COPY %PHYSREG
+ // %vreg = COPY %physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
NAPhysToVirtMIs.insert({SrcReg, MI});
@@ -1506,7 +1526,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
return false;
- // %PHYSREG = COPY %vreg
+ // %physreg = COPY %vreg
auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
if (PrevCopy == NAPhysToVirtMIs.end()) {
// We can't remove the copy: there was an intervening clobber of the
@@ -1601,16 +1621,16 @@ bool PeepholeOptimizer::findTargetRecurrence(
/// from the phi. For example, if there is a recurrence of
///
/// LoopHeader:
-/// %vreg1 = phi(%vreg0, %vreg100)
+/// %1 = phi(%0, %100)
/// LoopLatch:
-/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+/// %0<def, tied1> = ADD %2<def, tied0>, %1
///
-/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// , the fact that %0 and %2 are in the same tied operands set makes
/// the coalescing of copy instruction generated from the phi in
-/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
-/// %vreg2 have overlapping live range. This introduces additional move
-/// instruction to the final assembly. However, if we commute %vreg2 and
-/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
+/// %2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %2 and
+/// %1 of ADD instruction, the redundant move instruction can be
/// avoided.
bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
SmallSet<unsigned, 2> TargetRegs;
@@ -1642,7 +1662,7 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
@@ -1676,8 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// Track when a non-allocatable physical register is copied to a virtual
// register so that useless moves can be removed.
//
- // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
- // without any intervening re-definition of %PHYSREG.
+ // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
+ // without any intervening re-definition of %physreg.
DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
// Set of virtual registers that are copied from.
@@ -1847,7 +1867,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
FoldAsLoadDefCandidates.clear();
}
-
}
}
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index 4a50d895340a..f9d4a9746e41 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -31,11 +31,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-hazard-rec"
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f2249f9e37e0..5d86faafdd85 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -34,15 +33,15 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
@@ -280,7 +279,7 @@ bool PostRAScheduler::enablePostRAScheduler(
}
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(*Fn.getFunction()))
+ if (skipFunction(Fn.getFunction()))
return false;
TII = Fn.getSubtarget().getInstrInfo();
@@ -322,8 +321,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
- dbgs() << "*** DEBUG scheduling " << Fn.getName()
- << ":BB#" << MBB.getNumber() << " ***\n";
+ dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
+ << printMBBReference(MBB) << " ***\n";
}
#endif
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index fbc2bc64f425..8f88ef78828a 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -1,4 +1,4 @@
-//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
+//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,15 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
-namespace {
-
-bool lowerLoadRelative(Function &F) {
+static bool lowerLoadRelative(Function &F) {
if (F.use_empty())
return false;
@@ -55,7 +55,7 @@ bool lowerLoadRelative(Function &F) {
return Changed;
}
-bool lowerIntrinsics(Module &M) {
+static bool lowerIntrinsics(Module &M) {
bool Changed = false;
for (Function &F : M) {
if (F.getName().startswith("llvm.load.relative."))
@@ -64,23 +64,26 @@ bool lowerIntrinsics(Module &M) {
return Changed;
}
+namespace {
+
class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
public:
static char ID;
+
PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
- bool runOnModule(Module &M) { return lowerIntrinsics(M); }
+ bool runOnModule(Module &M) override { return lowerIntrinsics(M); }
};
+} // end anonymous namespace
+
char PreISelIntrinsicLoweringLegacyPass::ID;
-}
INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass,
"pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering",
false, false)
-namespace llvm {
-ModulePass *createPreISelIntrinsicLoweringPass() {
+ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
return new PreISelIntrinsicLoweringLegacyPass;
}
@@ -91,4 +94,3 @@ PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
else
return PreservedAnalyses::none();
}
-} // End llvm namespace
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 0118580a626a..48b48c5f6499 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -13,10 +13,10 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -154,7 +154,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
if (WorkList.empty())
continue;
- DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size()
<< " implicit defs.\n");
Changed = true;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e9f8d43fe643..a8d8ad8ac7dc 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1,4 +1,4 @@
-//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,77 +16,83 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <utility>
+#include <vector>
using namespace llvm;
#define DEBUG_TYPE "prologepilog"
-typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
-static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
- unsigned &MinCSFrameIndex,
- unsigned &MaxCXFrameIndex,
- const MBBVector &SaveBlocks,
- const MBBVector &RestoreBlocks);
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
namespace {
+
class PEI : public MachineFunctionPass {
public:
static char ID;
+
PEI() : MachineFunctionPass(ID) {
initializePEIPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
- MachineFunctionProperties getRequiredProperties() const override {
- MachineFunctionProperties MFP;
- if (UsesCalleeSaves)
- MFP.set(MachineFunctionProperties::Property::NoVRegs);
- return MFP;
- }
-
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
- ///
bool runOnMachineFunction(MachineFunction &Fn) override;
private:
- std::function<void(MachineFunction &MF, RegScavenger *RS,
- unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex,
- const MBBVector &SaveBlocks,
- const MBBVector &RestoreBlocks)>
- SpillCalleeSavedRegisters;
- std::function<void(MachineFunction &MF, RegScavenger &RS)>
- ScavengeFrameVirtualRegs;
-
- bool UsesCalleeSaves = false;
-
RegScavenger *RS;
// MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
@@ -108,8 +114,12 @@ private:
// FrameIndexVirtualScavenging is used.
bool FrameIndexEliminationScavenging;
+ // Emit remarks.
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+
void calculateCallFrameInfo(MachineFunction &Fn);
void calculateSaveRestoreBlocks(MachineFunction &Fn);
+ void spillCalleeSavedRegs(MachineFunction &MF);
void calculateFrameObjectOffsets(MachineFunction &Fn);
void replaceFrameIndices(MachineFunction &Fn);
@@ -117,9 +127,11 @@ private:
int &SPAdj);
void insertPrologEpilogCode(MachineFunction &Fn);
};
-} // namespace
+
+} // end anonymous namespace
char PEI::ID = 0;
+
char &llvm::PrologEpilogCodeInserterID = PEI::ID;
static cl::opt<unsigned>
@@ -132,6 +144,7 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
"Prologue/Epilogue Insertion & Frame Finalization", false,
false)
@@ -148,32 +161,17 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<StackProtector>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
/// StackObjSet - A set of stack object indexes
-typedef SmallSetVector<int, 8> StackObjSet;
+using StackObjSet = SmallSetVector<int, 8>;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
-///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
- if (!SpillCalleeSavedRegisters) {
- const TargetMachine &TM = Fn.getTarget();
- if (!TM.usesPhysRegsForPEI()) {
- SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
- unsigned &, unsigned &, const MBBVector &,
- const MBBVector &) {};
- ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
- } else {
- SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
- ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
- UsesCalleeSaves = true;
- }
- }
-
- const Function* F = Fn.getFunction();
+ const Function &F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
@@ -181,6 +179,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
TRI->requiresFrameIndexReplacementScavenging(Fn);
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
@@ -192,8 +191,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
calculateSaveRestoreBlocks(Fn);
// Handle CSR spilling and restoring, for targets that need it.
- SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
- SaveBlocks, RestoreBlocks);
+ if (Fn.getTarget().usesPhysRegsForPEI())
+ spillCalleeSavedRegs(Fn);
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
@@ -207,7 +206,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// called functions. Because of this, calculateCalleeSavedRegisters()
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
- if (!F->hasFnAttribute(Attribute::Naked))
+ if (!F.hasFnAttribute(Attribute::Naked))
insertPrologEpilogCode(Fn);
// Replace all MO_FrameIndex operands with physical register references
@@ -218,19 +217,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// If register scavenging is needed, as we've enabled doing it as a
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
- if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
- ScavengeFrameVirtualRegs(Fn, *RS);
-
- // Clear any vregs created by virtual scavenging.
- Fn.getRegInfo().clearVirtRegs();
- }
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(Fn, *RS);
// Warn on stack size when we exceeds the given limit.
MachineFrameInfo &MFI = Fn.getFrameInfo();
uint64_t StackSize = MFI.getStackSize();
if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
- DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
- F->getContext().diagnose(DiagStackSize);
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize);
+ F.getContext().diagnose(DiagStackSize);
}
delete RS;
@@ -459,87 +454,63 @@ static void updateLiveness(MachineFunction &MF) {
}
}
-/// insertCSRSpillsAndRestores - Insert spill and restore code for
-/// callee saved registers used in the function.
-///
-static void insertCSRSpillsAndRestores(MachineFunction &Fn,
- const MBBVector &SaveBlocks,
- const MBBVector &RestoreBlocks) {
- // Get callee saved register information.
- MachineFrameInfo &MFI = Fn.getFrameInfo();
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-
- MFI.setCalleeSavedInfoValid(true);
-
- // Early exit if no callee saved registers are modified!
- if (CSI.empty())
- return;
-
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+ ArrayRef<CalleeSavedInfo> CSI) {
+ MachineFunction &Fn = *SaveBlock.getParent();
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
- MachineBasicBlock::iterator I;
-
- // Spill using target interface.
- for (MachineBasicBlock *SaveBlock : SaveBlocks) {
- I = SaveBlock->begin();
- if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Insert the spill to the stack frame.
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
- RC, TRI);
- }
+
+ MachineBasicBlock::iterator I = SaveBlock.begin();
+ if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CS : CSI) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
}
- // Update the live-in information of all the blocks up to the save point.
- updateLiveness(Fn);
}
+}
- // Restore using target interface.
- for (MachineBasicBlock *MBB : RestoreBlocks) {
- I = MBB->end();
-
- // Skip over all terminator instructions, which are part of the return
- // sequence.
- MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->isTerminator())
- I = I2;
-
- bool AtStart = I == MBB->begin();
- MachineBasicBlock::iterator BeforeI = I;
- if (!AtStart)
- --BeforeI;
-
- // Restore all registers immediately before the return and any
- // terminators that precede it.
- if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
- assert(I != MBB->begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert
- // multiple instructions.
- if (AtStart)
- I = MBB->begin();
- else {
- I = BeforeI;
- ++I;
- }
- }
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+ std::vector<CalleeSavedInfo> &CSI) {
+ MachineFunction &Fn = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CI : reverse(CSI)) {
+ unsigned Reg = CI.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
}
}
}
-static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
- unsigned &MinCSFrameIndex,
- unsigned &MaxCSFrameIndex,
- const MBBVector &SaveBlocks,
- const MBBVector &RestoreBlocks) {
- const Function *F = Fn.getFunction();
+void PEI::spillCalleeSavedRegs(MachineFunction &Fn) {
+ // We can't list this requirement in getRequiredProperties because some
+ // targets (WebAssembly) use virtual registers past this point, and the pass
+ // pipeline is set up without giving the passes a chance to look at the
+ // TargetMachine.
+ // FIXME: Find a way to express this in getRequiredProperties.
+ assert(Fn.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ const Function &F = Fn.getFunction();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
MinCSFrameIndex = std::numeric_limits<unsigned>::max();
MaxCSFrameIndex = 0;
@@ -551,8 +522,21 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
// Add the code to save and restore the callee saved registers.
- if (!F->hasFnAttribute(Attribute::Naked))
- insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks);
+ if (!F.hasFnAttribute(Attribute::Naked)) {
+ MFI.setCalleeSavedInfoValid(true);
+
+ std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (!CSI.empty()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ insertCSRSaves(*SaveBlock, CSI);
+ // Update the live-in information of all the blocks up to the save
+ // point.
+ updateLiveness(Fn);
+ }
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ insertCSRRestores(*RestoreBlock, CSI);
+ }
+ }
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
@@ -585,7 +569,6 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
/// Compute which bytes of fixed and callee-save stack area are unused and keep
/// track of them in StackBytesFree.
-///
static inline void
computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
@@ -626,7 +609,6 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
/// Assign frame object to an unused portion of the stack in the fixed stack
/// object range. Return true if the allocation was successful.
-///
static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, unsigned MaxAlign,
BitVector &StackBytesFree) {
@@ -703,7 +685,6 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
-///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
StackProtector *SP = &getAnalysis<StackProtector>();
@@ -825,7 +806,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
// Retrieve the Exception Handler registration node.
- int EHRegNodeFrameIndex = INT_MAX;
+ int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
@@ -903,7 +884,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
// Allocate the EH registration node first if one is present.
- if (EHRegNodeFrameIndex != INT_MAX)
+ if (EHRegNodeFrameIndex != std::numeric_limits<int>::max())
AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
MaxAlign, Skew);
@@ -968,12 +949,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
int64_t StackSize = Offset - LocalAreaOffset;
MFI.setStackSize(StackSize);
NumBytesStackSpace += StackSize;
+
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
+ Fn.getFunction().getSubprogram(),
+ &Fn.front())
+ << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
+ });
}
/// insertPrologEpilogCode - Scan the function for modified callee saved
/// registers, insert spill code for these callee saved registers, then add
/// prolog and epilog code to the function.
-///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
@@ -995,21 +982,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
if (Fn.shouldSplitStack()) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
- }
+ // Record that there are split-stack functions, so we will emit a
+ // special section to tell the linker.
+ Fn.getMMI().setHasSplitStack(true);
+ } else
+ Fn.getMMI().setHasNosplitStack(true);
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
// approach is rather similar to that of Segmented Stacks, but it uses a
// different conditional check and another BIF for allocating more stack
// space.
- if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ if (Fn.getFunction().getCallingConv() == CallingConv::HiPE)
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
-///
void PEI::replaceFrameIndices(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
if (!TFI.needsFrameIndexResolution(Fn)) return;
@@ -1059,7 +1049,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
bool InsideCallSequence = false;
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
-
if (TII.isFrameInstr(*I)) {
InsideCallSequence = TII.isFrameSetup(*I);
SPAdj += TII.getSPAdjust(*I);
@@ -1081,11 +1070,12 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
- MachineOperand &Offset = MI.getOperand(1);
- Offset.setImm(
- Offset.getImm() +
- TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg));
+ int64_t Offset =
+ TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
+ DIExpression::NoDeref, Offset);
+ MI.getOperand(3).setMetadata(DIExpr);
continue;
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index b29e62bf1aa3..86fd87450521 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
@@ -24,7 +25,11 @@ static const char *const PSVNames[] = {
"Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
"GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII)
+ : Kind(Kind) {
+ AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
+}
+
PseudoSourceValue::~PseudoSourceValue() {}
@@ -75,8 +80,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
OS << "FixedStack" << FI;
}
-CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
- : PseudoSourceValue(Kind) {}
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
+ PSVKind Kind, const TargetInstrInfo &TII)
+ : PseudoSourceValue(Kind, TII) {}
bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
return false;
@@ -91,16 +97,20 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
}
GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
- const GlobalValue *GV)
- : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
-
-ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
- : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
-
-PseudoSourceValueManager::PseudoSourceValueManager()
- : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
- JumpTablePSV(PseudoSourceValue::JumpTable),
- ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+ const GlobalValue *GV,
+ const TargetInstrInfo &TII)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {}
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(
+ const char *ES, const TargetInstrInfo &TII)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager(
+ const TargetInstrInfo &TIInfo)
+ : TII(TIInfo),
+ StackPSV(PseudoSourceValue::Stack, TII),
+ GOTPSV(PseudoSourceValue::GOT, TII),
+ JumpTablePSV(PseudoSourceValue::JumpTable, TII),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {}
const PseudoSourceValue *PseudoSourceValueManager::getStack() {
return &StackPSV;
@@ -116,10 +126,11 @@ const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
return &JumpTablePSV;
}
-const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+const PseudoSourceValue *
+PseudoSourceValueManager::getFixedStack(int FI) {
std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
if (!V)
- V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+ V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII);
return V.get();
}
@@ -128,7 +139,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
GlobalCallEntries[GV];
if (!E)
- E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+ E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
return E.get();
}
@@ -137,6 +148,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
ExternalCallEntries[ES];
if (!E)
- E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+ E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
return E.get();
}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 8f19e432ab79..2fcbd1280da4 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -33,7 +33,7 @@ It also increase the likelihood the store may become dead.
bb27 ...
...
%reg1037 = ADDri %reg1039, 1
- %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+ %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10
Successors according to CFG: 0x8b03bf0 (#5)
bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 7b4fbace2c1c..74c1592634aa 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,19 +14,22 @@
#include "RegAllocBase.h"
#include "Spiller.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
using namespace llvm;
@@ -37,8 +40,8 @@ STATISTIC(NumNewQueued , "Number of new live ranges queued");
// Temporary verification option until we can put verification inside
// MachineVerifier.
static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
- cl::desc("Verify during register allocation"));
+ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::Hidden, cl::desc("Verify during register allocation"));
const char RegAllocBase::TimerGroupName[] = "regalloc";
const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
@@ -103,7 +106,9 @@ void RegAllocBase::allocatePhysRegs() {
DEBUG(dbgs() << "\nselectOrSplit "
<< TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
<< ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
- typedef SmallVector<unsigned, 4> VirtRegVec;
+
+ using VirtRegVec = SmallVector<unsigned, 4>;
+
VirtRegVec SplitVRegs;
unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index d8921b5ce6db..686ffc36e049 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -1,4 +1,4 @@
-//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -37,17 +37,20 @@
#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
-#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
namespace llvm {
-template<typename T> class SmallVectorImpl;
-class TargetRegisterInfo;
-class VirtRegMap;
+class LiveInterval;
class LiveIntervals;
class LiveRegMatrix;
+class MachineInstr;
+class MachineRegisterInfo;
+template<typename T> class SmallVectorImpl;
class Spiller;
+class TargetRegisterInfo;
+class VirtRegMap;
/// RegAllocBase provides the register allocation driver and interface that can
/// be extended to add interesting heuristics.
@@ -57,12 +60,13 @@ class Spiller;
/// assignment order.
class RegAllocBase {
virtual void anchor();
+
protected:
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- VirtRegMap *VRM;
- LiveIntervals *LIS;
- LiveRegMatrix *Matrix;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+ LiveRegMatrix *Matrix = nullptr;
RegisterClassInfo RegClassInfo;
/// Inst which is a def of an original reg and whose defs are already all
@@ -71,10 +75,8 @@ protected:
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;
- RegAllocBase()
- : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
-
- virtual ~RegAllocBase() {}
+ RegAllocBase() = default;
+ virtual ~RegAllocBase() = default;
// A RegAlloc pass should call this before allocatePhysRegs.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
@@ -120,4 +122,4 @@ private:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_REGALLOCBASE_H
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 774306154a89..6e273277804b 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -18,7 +18,7 @@
#include "Spiller.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -29,11 +29,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cstdlib>
#include <queue>
@@ -143,14 +143,17 @@ INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)
bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
- LiveInterval &LI = LIS->getInterval(VirtReg);
Matrix->unassign(LI);
aboutToRemoveInterval(LI);
return true;
}
// Unassigned virtreg is probably in the priority queue.
// RegAllocBase will erase it after dequeueing.
+ // Nonetheless, clear the live-range so that the debug
+ // dump will show the right state for that VirtReg.
+ LI.clear();
return false;
}
@@ -216,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Intfs.push_back(Intf);
}
}
- DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
- " interferences with " << VirtReg << "\n");
+ DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI)
+ << " interferences with " << VirtReg << "\n");
assert(!Intfs.empty() && "expected interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index d5538be4bba2..6a5282cbbbff 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,32 +7,47 @@
//
//===----------------------------------------------------------------------===//
//
-// This register allocator allocates registers to a basic block at a time,
-// attempting to keep values in registers and reusing registers as appropriate.
+/// \file This register allocator allocates registers to a basic block at a
+/// time, attempting to keep values in registers and reusing registers as
+/// appropriate.
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -45,109 +60,106 @@ static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
namespace {
- class RAFast : public MachineFunctionPass {
+
+ class RegAllocFast : public MachineFunctionPass {
public:
static char ID;
- RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {}
+
+ RegAllocFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
private:
- MachineFunction *MF;
+ MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
- // Basic block currently being allocated.
+ /// Basic block currently being allocated.
MachineBasicBlock *MBB;
- // StackSlotForVirtReg - Maps virtual regs to the frame index where these
- // values are spilled.
+ /// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
- // Everything we know about a live virtual register.
+ /// Everything we know about a live virtual register.
struct LiveReg {
- MachineInstr *LastUse; // Last instr to use reg.
- unsigned VirtReg; // Virtual register number.
- unsigned PhysReg; // Currently held here.
- unsigned short LastOpNum; // OpNum on LastUse.
- bool Dirty; // Register needs spill.
+ MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
+ unsigned VirtReg; ///< Virtual register number.
+ MCPhysReg PhysReg = 0; ///< Currently held here.
+ unsigned short LastOpNum = 0; ///< OpNum on LastUse.
+ bool Dirty = false; ///< Register needs spill.
- explicit LiveReg(unsigned v)
- : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){}
+ explicit LiveReg(unsigned v) : VirtReg(v) {}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
- typedef SparseSet<LiveReg> LiveRegMap;
+ using LiveRegMap = SparseSet<LiveReg>;
- // LiveVirtRegs - This map contains entries for each virtual register
- // that is currently available in a physical register.
+ /// This map contains entries for each virtual register that is currently
+ /// available in a physical register.
LiveRegMap LiveVirtRegs;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap;
- // RegState - Track the state of a physical register.
+ /// Track the state of a physical register.
enum RegState {
- // A disabled register is not available for allocation, but an alias may
- // be in use. A register can only be moved out of the disabled state if
- // all aliases are disabled.
+ /// A disabled register is not available for allocation, but an alias may
+ /// be in use. A register can only be moved out of the disabled state if
+ /// all aliases are disabled.
regDisabled,
- // A free register is not currently in use and can be allocated
- // immediately without checking aliases.
+ /// A free register is not currently in use and can be allocated
+ /// immediately without checking aliases.
regFree,
- // A reserved register has been assigned explicitly (e.g., setting up a
- // call parameter), and it remains reserved until it is used.
+ /// A reserved register has been assigned explicitly (e.g., setting up a
+ /// call parameter), and it remains reserved until it is used.
regReserved
- // A register state may also be a virtual register number, indication that
- // the physical register is currently allocated to a virtual register. In
- // that case, LiveVirtRegs contains the inverse mapping.
+ /// A register state may also be a virtual register number, indication
+ /// that the physical register is currently allocated to a virtual
+ /// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- // PhysRegState - One of the RegState enums, or a virtreg.
+ /// One of the RegState enums, or a virtreg.
std::vector<unsigned> PhysRegState;
- // Set of register units.
- typedef SparseSet<unsigned> UsedInInstrSet;
+ SmallVector<unsigned, 16> VirtDead;
+ SmallVector<MachineInstr *, 32> Coalesced;
+
+ /// Set of register units.
+ using UsedInInstrSet = SparseSet<unsigned>;
- // Set of register units that are used in the current instruction, and so
- // cannot be allocated.
+ /// Set of register units that are used in the current instruction, and so
+ /// cannot be allocated.
UsedInInstrSet UsedInInstr;
- // Mark a physreg as used in this instruction.
- void markRegUsedInInstr(unsigned PhysReg) {
+ /// Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(MCPhysReg PhysReg) {
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
UsedInInstr.insert(*Units);
}
- // Check if a physreg or any of its aliases are used in this instruction.
- bool isRegUsedInInstr(unsigned PhysReg) const {
+ /// Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(MCPhysReg PhysReg) const {
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
if (UsedInInstr.count(*Units))
return true;
return false;
}
- // SkippedInstrs - Descriptors of instructions whose clobber list was
- // ignored because all registers were spilled. It is still necessary to
- // mark all the clobbered registers as used by the function.
- SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
-
- // isBulkSpilling - This flag is set when LiveRegMap will be cleared
- // completely after spilling all live registers. LiveRegMap entries should
- // not be erased.
- bool isBulkSpilling;
+ /// This flag is set when LiveRegMap will be cleared completely after
+ /// spilling all live registers. LiveRegMap entries should not be erased.
+ bool isBulkSpilling = false;
enum : unsigned {
spillClean = 1,
spillDirty = 100,
spillImpossible = ~0u
};
+
public:
StringRef getPassName() const override { return "Fast Register Allocator"; }
@@ -168,29 +180,32 @@ namespace {
private:
bool runOnMachineFunction(MachineFunction &Fn) override;
- void AllocateBasicBlock();
- void handleThroughOperands(MachineInstr *MI,
+ void allocateBasicBlock(MachineBasicBlock &MBB);
+ void handleThroughOperands(MachineInstr &MI,
SmallVectorImpl<unsigned> &VirtDead);
- int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
- bool isLastUseOfLocalReg(MachineOperand&);
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC);
+ bool isLastUseOfLocalReg(const MachineOperand &MO) const;
- void addKillFlag(const LiveReg&);
- void killVirtReg(LiveRegMap::iterator);
+ void addKillFlag(const LiveReg &LRI);
+ void killVirtReg(LiveRegMap::iterator LRI);
void killVirtReg(unsigned VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
- void usePhysReg(MachineOperand&);
- void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState);
- unsigned calcSpillCost(unsigned PhysReg) const;
- void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ void usePhysReg(MachineOperand &MO);
+ void definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, RegState NewState);
+ unsigned calcSpillCost(MCPhysReg PhysReg) const;
+ void assignVirtToPhysReg(LiveReg&, MCPhysReg PhysReg);
+
LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
+
LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
- LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg);
LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
unsigned Hint);
LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
@@ -198,35 +213,41 @@ namespace {
LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
- bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
+
+ void dumpState();
};
- char RAFast::ID = 0;
-}
-INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false)
+} // end anonymous namespace
+
+char RegAllocFast::ID = 0;
+
+INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
+ false)
-/// getStackSpaceFor - This allocates space for the specified virtual register
-/// to be held on the stack.
-int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+/// This allocates space for the specified virtual register to be held on the
+/// stack.
+int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
+ const TargetRegisterClass &RC) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
+ // Already has space allocated?
if (SS != -1)
- return SS; // Already has space allocated?
+ return SS;
// Allocate a new stack object for this spill location...
- unsigned Size = TRI->getSpillSize(*RC);
- unsigned Align = TRI->getSpillAlignment(*RC);
- int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
+ unsigned Size = TRI->getSpillSize(RC);
+ unsigned Align = TRI->getSpillAlignment(RC);
+ int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
return FrameIdx;
}
-/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
-/// its virtual register, and it is guaranteed to be a block-local register.
-///
-bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+/// Return true if MO is the only remaining reference to its virtual register,
+/// and it is guaranteed to be a block-local register.
+bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
// If the register has ever been spilled or reloaded, we conservatively assume
// it is a global register used in multiple blocks.
if (StackSlotForVirtReg[MO.getReg()] != -1)
@@ -239,8 +260,8 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
return ++I == MRI->reg_nodbg_end();
}
-/// addKillFlag - Set kill flags on last use of a virtual register.
-void RAFast::addKillFlag(const LiveReg &LR) {
+/// Set kill flags on last use of a virtual register.
+void RegAllocFast::addKillFlag(const LiveReg &LR) {
if (!LR.LastUse) return;
MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
@@ -250,7 +271,7 @@ void RAFast::addKillFlag(const LiveReg &LR) {
// subreg of this register and given we don't track which
// lanes are actually dead, we cannot insert a kill flag here.
// Otherwise we may end up in a situation like this:
- // ... = (MO) physreg:sub1, physreg <implicit-use, kill>
+ // ... = (MO) physreg:sub1, implicit killed physreg
// ... <== Here we would allow later pass to reuse physreg:sub1
// which is potentially wrong.
// LR:sub0 = ...
@@ -258,8 +279,8 @@ void RAFast::addKillFlag(const LiveReg &LR) {
}
}
-/// killVirtReg - Mark virtreg as no longer available.
-void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
addKillFlag(*LRI);
assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
"Broken RegState mapping");
@@ -269,8 +290,8 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
LiveVirtRegs.erase(LRI);
}
-/// killVirtReg - Mark virtreg as no longer available.
-void RAFast::killVirtReg(unsigned VirtReg) {
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -278,9 +299,10 @@ void RAFast::killVirtReg(unsigned VirtReg) {
killVirtReg(LRI);
}
-/// spillVirtReg - This method spills the value specified by VirtReg into the
-/// corresponding stack slot if needed.
-void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+/// This method spills the value specified by VirtReg into the corresponding
+/// stack slot if needed.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -288,9 +310,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
spillVirtReg(MI, LRI);
}
-/// spillVirtReg - Do the actual work of spilling.
-void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
- LiveRegMap::iterator LRI) {
+/// Do the actual work of spilling.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
LiveReg &LR = *LRI;
assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
@@ -299,12 +321,12 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// instruction, not on the spill.
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
- << " in " << PrintReg(LR.PhysReg, TRI));
- const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI)
+ << " in " << printReg(LR.PhysReg, TRI));
+ const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
int FI = getStackSpaceFor(LRI->VirtReg, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
- TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
++NumStores; // Update statistics
// If this register is used by DBG_VALUE then insert new DBG_VALUE to
@@ -312,8 +334,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// value.
SmallVectorImpl<MachineInstr *> &LRIDbgValues =
LiveDbgValueMap[LRI->VirtReg];
- for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
- MachineInstr *DBG = LRIDbgValues[li];
+ for (MachineInstr *DBG : LRIDbgValues) {
MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
@@ -329,32 +350,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
killVirtReg(LRI);
}
-/// spillAll - Spill all dirty virtregs without killing them.
-void RAFast::spillAll(MachineBasicBlock::iterator MI) {
+/// Spill all dirty virtregs without killing them.
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
if (LiveVirtRegs.empty()) return;
isBulkSpilling = true;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
- i != e; ++i)
- spillVirtReg(MI, i);
+ for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
+ I != E; ++I)
+ spillVirtReg(MI, I);
LiveVirtRegs.clear();
isBulkSpilling = false;
}
-/// usePhysReg - Handle the direct use of a physical register.
-/// Check that the register is not used by a virtreg.
-/// Kill the physreg, marking it free.
-/// This may add implicit kills to MO->getParent() and invalidate MO.
-void RAFast::usePhysReg(MachineOperand &MO) {
- unsigned PhysReg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- "Bad usePhysReg operand");
-
+/// Handle the direct use of a physical register. Check that the register is
+/// not used by a virtreg. Kill the physreg, marking it free. This may add
+/// implicit kills to MO->getParent() and invalidate MO.
+void RegAllocFast::usePhysReg(MachineOperand &MO) {
// Ignore undef uses.
if (MO.isUndef())
return;
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
case regDisabled:
@@ -373,7 +393,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
// Maybe a superregister is reserved?
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- unsigned Alias = *AI;
+ MCPhysReg Alias = *AI;
switch (PhysRegState[Alias]) {
case regDisabled:
break;
@@ -411,11 +431,11 @@ void RAFast::usePhysReg(MachineOperand &MO) {
MO.setIsKill();
}
-/// definePhysReg - Mark PhysReg as reserved or free after spilling any
-/// virtregs. This is very similar to defineVirtReg except the physreg is
-/// reserved instead of allocated.
-void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
- RegState NewState) {
+/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
+/// similar to defineVirtReg except the physreg is reserved instead of
+/// allocated.
+void RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg PhysReg,
+ RegState NewState) {
markRegUsedInInstr(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
@@ -432,7 +452,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
// This is a disabled register, disable all aliases.
PhysRegState[PhysReg] = NewState;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- unsigned Alias = *AI;
+ MCPhysReg Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -449,15 +469,13 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
}
}
-
-// calcSpillCost - Return the cost of spilling clearing out PhysReg and
-// aliases so it is free for allocation.
-// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
-// can be allocated directly.
-// Returns spillImpossible when PhysReg or an alias can't be spilled.
-unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is
+/// free for allocation. Returns 0 when PhysReg is free or disabled with all
+/// aliases disabled - it can be allocated directly.
+/// \returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
if (isRegUsedInInstr(PhysReg)) {
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
}
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -466,8 +484,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
case regFree:
return 0;
case regReserved:
- DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
- << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+ DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
@@ -477,10 +495,10 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
}
// This is a disabled register, add up cost of aliases.
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- unsigned Alias = *AI;
+ MCPhysReg Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -500,45 +518,37 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
return Cost;
}
-
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now. The physical
-/// register must not be used for anything else when this is called.
-///
-void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
- << PrintReg(PhysReg, TRI) << "\n");
+/// \brief This method updates local state so that we know that PhysReg is the
+/// proper container for VirtReg now. The physical register must not be used
+/// for anything else when this is called.
+void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
+ DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << "\n");
PhysRegState[PhysReg] = LR.VirtReg;
assert(!LR.PhysReg && "Already assigned a physreg");
LR.PhysReg = PhysReg;
}
-RAFast::LiveRegMap::iterator
-RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+RegAllocFast::LiveRegMap::iterator
+RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
-/// allocVirtReg - Allocate a physical register for VirtReg.
-RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
- LiveRegMap::iterator LRI,
- unsigned Hint) {
+/// Allocates a physical register for VirtReg.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
+ LiveRegMap::iterator LRI, unsigned Hint) {
const unsigned VirtReg = LRI->VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
- // Ignore invalid hints.
- if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
- Hint = 0;
-
// Take hint when possible.
- if (Hint) {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
+ MRI->isAllocatable(Hint) && RC.contains(Hint)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint);
if (Cost < spillDirty) {
@@ -550,33 +560,32 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
}
}
- ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
-
// First try to find a completely free register.
- for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
- unsigned PhysReg = *I;
+ ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC);
+ for (MCPhysReg PhysReg : AO) {
if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
}
- DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
- << TRI->getRegClassName(RC) << "\n");
+ DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
+ << TRI->getRegClassName(&RC) << "\n");
- unsigned BestReg = 0, BestCost = spillImpossible;
- for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
- unsigned Cost = calcSpillCost(*I);
- DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ unsigned BestReg = 0;
+ unsigned BestCost = spillImpossible;
+ for (MCPhysReg PhysReg : AO) {
+ unsigned Cost = calcSpillCost(PhysReg);
+ DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
DEBUG(dbgs() << "\tCost: " << Cost << "\n");
DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
// Cost is 0 when all aliases are already disabled.
if (Cost == 0) {
- assignVirtToPhysReg(*LRI, *I);
+ assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
if (Cost < BestCost)
- BestReg = *I, BestCost = Cost;
+ BestReg = PhysReg, BestCost = Cost;
}
if (BestReg) {
@@ -595,11 +604,11 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
return assignVirtToPhysReg(VirtReg, *AO.begin());
}
-/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
-RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+/// Allocates a register for VirtReg and mark it as dirty.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
@@ -629,11 +638,11 @@ RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
return LRI;
}
-/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
-RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+/// Make sure VirtReg is available in a physreg and return it.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
@@ -642,11 +651,11 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
MachineOperand &MO = MI.getOperand(OpNum);
if (New) {
LRI = allocVirtReg(MI, LRI, Hint);
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
- << PrintReg(LRI->PhysReg, TRI) << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
+ DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+ << printReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
++NumLoads;
} else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
@@ -665,7 +674,7 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
} else if (MO.isKill()) {
// We must remove kill flags from uses of reloaded registers because the
// register would be killed immediately, and there might be a second use:
- // %foo = OR %x<kill>, %x
+ // %foo = OR killed %x, %x
// This would cause a second reload of %x into a different register.
DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
MO.setIsKill(false);
@@ -680,241 +689,236 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
return LRI;
}
-// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
-// subregs. This may invalidate any operand pointers.
-// Return true if the operand kills its register.
-bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
- MachineOperand &MO = MI->getOperand(OpNum);
+/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
+/// may invalidate any operand pointers. Return true if the operand kills its
+/// register.
+bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
+ MCPhysReg PhysReg) {
+ MachineOperand &MO = MI.getOperand(OpNum);
bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
+ MO.setIsRenamableIfNoExtraRegAllocReq();
return MO.isKill() || Dead;
}
// Handle subregister index.
MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setIsRenamableIfNoExtraRegAllocReq();
MO.setSubReg(0);
// A kill flag implies killing the full register. Add corresponding super
// register kill.
if (MO.isKill()) {
- MI->addRegisterKilled(PhysReg, TRI, true);
+ MI.addRegisterKilled(PhysReg, TRI, true);
return true;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
// register.
if (MO.isDef() && MO.isUndef())
- MI->addRegisterDefined(PhysReg, TRI);
+ MI.addRegisterDefined(PhysReg, TRI);
return Dead;
}
-// Handle special instruction operand like early clobbers and tied ops when
+// Handles special instruction operand like early clobbers and tied ops when
// there are additional physreg defines.
-void RAFast::handleThroughOperands(MachineInstr *MI,
- SmallVectorImpl<unsigned> &VirtDead) {
+void RegAllocFast::handleThroughOperands(MachineInstr &MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
DEBUG(dbgs() << "Scanning for through registers:");
SmallSet<unsigned, 8> ThroughRegs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
- (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
+ (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
if (ThroughRegs.insert(Reg).second)
- DEBUG(dbgs() << ' ' << PrintReg(Reg));
+ DEBUG(dbgs() << ' ' << printReg(Reg));
}
}
// If any physreg defines collide with preallocated through registers,
// we must spill and reallocate.
DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (ThroughRegs.count(PhysRegState[*AI]))
- definePhysReg(*MI, *AI, regFree);
+ definePhysReg(MI, *AI, regFree);
}
}
SmallVector<unsigned, 8> PartialDefs;
DEBUG(dbgs() << "Allocating tied uses.\n");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
- unsigned DefIdx = 0;
- if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
- DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
- << DefIdx << ".\n");
- LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
- unsigned PhysReg = LRI->PhysReg;
- setPhysReg(MI, i, PhysReg);
+ if (!MO.isTied()) continue;
+ DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand "
+ << MI.findTiedOperandIdx(I) << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LRI->PhysReg;
+ setPhysReg(MI, I, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
- } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
- LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
PartialDefs.push_back(LRI->PhysReg);
}
}
DEBUG(dbgs() << "Allocating early clobbers.\n");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0);
- unsigned PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, i, PhysReg))
+ LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
+ MCPhysReg PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, I, PhysReg))
VirtDead.push_back(Reg);
}
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
UsedInInstr.clear();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
<< " as used in instr\n");
markRegUsedInInstr(Reg);
}
// Also mark PartialDefs as used to avoid reallocation.
- for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
- markRegUsedInInstr(PartialDefs[i]);
+ for (unsigned PartialDef : PartialDefs)
+ markRegUsedInInstr(PartialDef);
+}
+
+#ifndef NDEBUG
+void RegAllocFast::dumpState() {
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << printReg(Reg, TRI);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default: {
+ dbgs() << '=' << printReg(PhysRegState[Reg]);
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
+ dbgs() << "*";
+ assert(I->PhysReg == Reg && "Bad inverse map");
+ break;
+ }
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ }
}
+#endif
-void RAFast::AllocateBasicBlock() {
- DEBUG(dbgs() << "\nAllocating " << *MBB);
+void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
+ this->MBB = &MBB;
+ DEBUG(dbgs() << "\nAllocating " << MBB);
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- MachineBasicBlock::iterator MII = MBB->begin();
+ MachineBasicBlock::iterator MII = MBB.begin();
// Add live-in registers as live.
- for (const auto &LI : MBB->liveins())
+ for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
if (MRI->isAllocatable(LI.PhysReg))
definePhysReg(*MII, LI.PhysReg, regReserved);
- SmallVector<unsigned, 8> VirtDead;
- SmallVector<MachineInstr*, 32> Coalesced;
+ VirtDead.clear();
+ Coalesced.clear();
// Otherwise, sequentially allocate each instruction in the MBB.
- while (MII != MBB->end()) {
- MachineInstr *MI = &*MII++;
- const MCInstrDesc &MCID = MI->getDesc();
- DEBUG({
- dbgs() << "\n>> " << *MI << "Regs:";
- for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
- if (PhysRegState[Reg] == regDisabled) continue;
- dbgs() << " " << TRI->getName(Reg);
- switch(PhysRegState[Reg]) {
- case regFree:
- break;
- case regReserved:
- dbgs() << "*";
- break;
- default: {
- dbgs() << '=' << PrintReg(PhysRegState[Reg]);
- LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
- assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
- if (I->Dirty)
- dbgs() << "*";
- assert(I->PhysReg == Reg && "Bad inverse map");
- break;
- }
- }
- }
- dbgs() << '\n';
- // Check that LiveVirtRegs is the inverse.
- for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
- e = LiveVirtRegs.end(); i != e; ++i) {
- assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
- "Bad map key");
- assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
- "Bad map value");
- assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
- }
- });
+ for (MachineInstr &MI : MBB) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ DEBUG(
+ dbgs() << "\n>> " << MI << "Regs:";
+ dumpState()
+ );
// Debug values are not allowed to change codegen in any way.
- if (MI->isDebugValue()) {
- bool ScanDbgValue = true;
- while (ScanDbgValue) {
- ScanDbgValue = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
- LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
- if (LRI != LiveVirtRegs.end())
- setPhysReg(MI, i, LRI->PhysReg);
- else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS == -1) {
- // We can't allocate a physreg for a DebugValue, sorry!
- DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(0);
- }
- else {
- // Modify DBG_VALUE now that the value is in a spill slot.
- bool IsIndirect = MI->isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getDebugVariable();
- const MDNode *Expr = MI->getDebugExpression();
- DebugLoc DL = MI->getDebugLoc();
- MachineBasicBlock *MBB = MI->getParent();
- assert(
- cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
- TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SS)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
- DEBUG(dbgs() << "Modifying debug info due to spill:"
- << "\t" << *NewDV);
- // Scan NewDV operands from the beginning.
- MI = NewDV;
- ScanDbgValue = true;
- break;
- }
- }
- LiveDbgValueMap[Reg].push_back(MI);
+ if (MI.isDebugValue()) {
+ MachineInstr *DebugMI = &MI;
+ MachineOperand &MO = DebugMI->getOperand(0);
+
+ // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+ // mostly constants and frame indices.
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // See if this virtual register has already been allocated to a physical
+ // register or spilled to a stack slot.
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(*DebugMI, 0, LRI->PhysReg);
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(*DebugMI, SS);
+ DEBUG(dbgs() << "Modifying debug info due to spill:"
+ << "\t" << *DebugMI);
+ continue;
}
+
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
}
- // Next instruction.
+
+ // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+ // that future spills of Reg will have DBG_VALUEs.
+ LiveDbgValueMap[Reg].push_back(DebugMI);
continue;
}
// If this is a copy, we may be able to coalesce.
- unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
- if (MI->isCopy()) {
- CopyDst = MI->getOperand(0).getReg();
- CopySrc = MI->getOperand(1).getReg();
- CopyDstSub = MI->getOperand(0).getSubReg();
- CopySrcSub = MI->getOperand(1).getSubReg();
+ unsigned CopySrcReg = 0;
+ unsigned CopyDstReg = 0;
+ unsigned CopySrcSub = 0;
+ unsigned CopyDstSub = 0;
+ if (MI.isCopy()) {
+ CopyDstReg = MI.getOperand(0).getReg();
+ CopySrcReg = MI.getOperand(1).getReg();
+ CopyDstSub = MI.getOperand(0).getSubReg();
+ CopySrcSub = MI.getOperand(1).getSubReg();
}
// Track registers used by instruction.
@@ -928,8 +932,8 @@ void RAFast::AllocateBasicBlock() {
bool hasEarlyClobbers = false;
bool hasPartialRedefs = false;
bool hasPhysDefs = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask()) {
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
@@ -946,7 +950,7 @@ void RAFast::AllocateBasicBlock() {
} else {
if (MO.isEarlyClobber())
hasEarlyClobbers = true;
- if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
hasPartialRedefs = true;
}
continue;
@@ -955,7 +959,7 @@ void RAFast::AllocateBasicBlock() {
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
- definePhysReg(*MI, Reg,
+ definePhysReg(MI, Reg,
(MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
hasEarlyClobbers = true;
} else
@@ -971,11 +975,11 @@ void RAFast::AllocateBasicBlock() {
// sure the same register is allocated to uses and defs.
// We didn't detect inline asm tied operands above, so just make this extra
// pass for all inline asm.
- if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
(hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
- CopyDst = 0;
+ CopyDstReg = 0;
// Pretend we have early clobbers so the use operands get marked below.
// This is not necessary for the common case of a single tied use.
hasEarlyClobbers = true;
@@ -983,16 +987,16 @@ void RAFast::AllocateBasicBlock() {
// Second scan.
// Allocate virtreg uses.
- for (unsigned i = 0; i != VirtOpEnd; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned I = 0; I != VirtOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
- LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst);
- unsigned PhysReg = LRI->PhysReg;
- CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
- if (setPhysReg(MI, i, PhysReg))
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
+ MCPhysReg PhysReg = LRI->PhysReg;
+ CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, I, PhysReg))
killVirtReg(LRI);
}
}
@@ -1001,19 +1005,18 @@ void RAFast::AllocateBasicBlock() {
// this point.
UsedInInstr.clear();
if (hasEarlyClobbers) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// Look for physreg defs and tied uses.
- if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+ if (!MO.isDef() && !MO.isTied()) continue;
markRegUsedInInstr(Reg);
}
}
- unsigned DefOpEnd = MI->getNumOperands();
- if (MI->isCall()) {
+ unsigned DefOpEnd = MI.getNumOperands();
+ if (MI.isCall()) {
// Spill all virtregs before a call. This serves one purpose: If an
// exception is thrown, the landing pad is going to expect to find
// registers in their spill slots.
@@ -1023,99 +1026,92 @@ void RAFast::AllocateBasicBlock() {
// those for virtual registers in between.
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
-
- // The imp-defs are skipped below, but we still need to mark those
- // registers as used by the function.
- SkippedInstrs.insert(&MCID);
}
// Third scan.
// Allocate defs and collect dead defs.
- for (unsigned i = 0; i != DefOpEnd; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
continue;
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved);
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
continue;
}
- LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc);
- unsigned PhysReg = LRI->PhysReg;
- if (setPhysReg(MI, i, PhysReg)) {
+ LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
+ MCPhysReg PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, I, PhysReg)) {
VirtDead.push_back(Reg);
- CopyDst = 0; // cancel coalescing;
+ CopyDstReg = 0; // cancel coalescing;
} else
- CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+ CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
}
// Kill dead defs after the scan to ensure that multiple defs of the same
// register are allocated identically. We didn't need to do this for uses
// because we are crerating our own kill flags, and they are always at the
// last use.
- for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
- killVirtReg(VirtDead[i]);
+ for (unsigned VirtReg : VirtDead)
+ killVirtReg(VirtReg);
VirtDead.clear();
- if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
- DEBUG(dbgs() << "-- coalescing: " << *MI);
- Coalesced.push_back(MI);
+ if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << MI);
+ Coalesced.push_back(&MI);
} else {
- DEBUG(dbgs() << "<< " << *MI);
+ DEBUG(dbgs() << "<< " << MI);
}
}
// Spill all physical registers holding virtual registers now.
DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB->getFirstTerminator());
+ spillAll(MBB.getFirstTerminator());
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
- for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
- MBB->erase(Coalesced[i]);
+ for (MachineInstr *MI : Coalesced)
+ MBB.erase(MI);
NumCopies += Coalesced.size();
- DEBUG(MBB->dump());
+ DEBUG(MBB.dump());
}
-/// runOnMachineFunction - Register allocate the whole function
-///
-bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+/// Allocates registers for a function.
+bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
- << "********** Function: " << Fn.getName() << '\n');
- MF = &Fn;
- MRI = &MF->getRegInfo();
- TRI = MF->getSubtarget().getRegisterInfo();
- TII = MF->getSubtarget().getInstrInfo();
- MRI->freezeReservedRegs(Fn);
- RegClassInfo.runOnMachineFunction(Fn);
+ << "********** Function: " << MF.getName() << '\n');
+ MRI = &MF.getRegInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TII = STI.getInstrInfo();
+ MFI = &MF.getFrameInfo();
+ MRI->freezeReservedRegs(MF);
+ RegClassInfo.runOnMachineFunction(MF);
UsedInInstr.clear();
UsedInInstr.setUniverse(TRI->getNumRegUnits());
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
- StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
- LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ StackSlotForVirtReg.resize(NumVirtRegs);
+ LiveVirtRegs.setUniverse(NumVirtRegs);
// Loop over all of the basic blocks, eliminating virtual register references
- for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
- MBBi != MBBe; ++MBBi) {
- MBB = &*MBBi;
- AllocateBasicBlock();
- }
+ for (MachineBasicBlock &MBB : MF)
+ allocateBasicBlock(MBB);
// All machine operands and other references to virtual registers have been
// replaced. Remove the virtual registers.
MRI->clearVirtRegs();
- SkippedInstrs.clear();
StackSlotForVirtReg.clear();
LiveDbgValueMap.clear();
return true;
}
FunctionPass *llvm::createFastRegisterAllocator() {
- return new RAFast();
+ return new RegAllocFast();
}
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 020e81eca2dd..186ef577e31d 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -30,12 +31,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -53,6 +54,9 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -65,10 +69,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -104,10 +105,11 @@ static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
" interference at a time"),
cl::init(8));
-static cl::opt<bool>
-ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
- cl::desc("Exhaustive Search for registers bypassing the depth "
- "and interference cutoffs of last chance recoloring"));
+static cl::opt<bool> ExhaustiveSearch(
+ "exhaustive-register-search", cl::NotHidden,
+ cl::desc("Exhaustive Search for registers bypassing the depth "
+ "and interference cutoffs of last chance recoloring"),
+ cl::Hidden);
static cl::opt<bool> EnableLocalReassignment(
"enable-local-reassign", cl::Hidden,
@@ -129,6 +131,12 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::desc("Cost for first time use of callee-saved register."),
cl::init(0), cl::Hidden);
+static cl::opt<bool> ConsiderLocalIntervalCost(
+ "condsider-local-interval-cost", cl::Hidden,
+ cl::desc("Consider the cost of local intervals created by a split "
+ "candidate when choosing the best split candidate."),
+ cl::init(false));
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -277,6 +285,57 @@ class RAGreedy : public MachineFunctionPass,
}
};
+ /// EvictionTrack - Keeps track of past evictions in order to optimize region
+ /// split decision.
+ class EvictionTrack {
+
+ public:
+ using EvictorInfo =
+ std::pair<unsigned /* evictor */, unsigned /* physreg */>;
+ using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>;
+
+ private:
+ /// Each Vreg that has been evicted in the last stage of selectOrSplit will
+ /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
+ EvicteeInfo Evictees;
+
+ public:
+ /// \brief Clear all eviction information.
+ void clear() { Evictees.clear(); }
+
+ /// \brief Clear eviction information for the given evictee Vreg.
+ /// E.g. when Vreg get's a new allocation, the old eviction info is no
+ /// longer relevant.
+ /// \param Evictee The evictee Vreg for whom we want to clear collected
+ /// eviction info.
+ void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
+
+ /// \brief Track new eviction.
+ /// The Evictor vreg has evicted the Evictee vreg from Physreg.
+ /// \praram PhysReg The phisical register Evictee was evicted from.
+ /// \praram Evictor The evictor Vreg that evicted Evictee.
+ /// \praram Evictee The evictee Vreg.
+ void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
+ Evictees[Evictee].first = Evictor;
+ Evictees[Evictee].second = PhysReg;
+ }
+
+ /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
+ /// \praram Evictee The evictee vreg.
+ /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
+ /// nobody has evicted Evictee from PhysReg.
+ EvictorInfo getEvictor(unsigned Evictee) {
+ if (Evictees.count(Evictee)) {
+ return Evictees[Evictee];
+ }
+
+ return EvictorInfo(0, 0);
+ }
+ };
+
+ // Keeps track of past evictions in order to optimize region split decision.
+ EvictionTrack LastEvicted;
+
// splitting state.
std::unique_ptr<SplitAnalysis> SA;
std::unique_ptr<SplitEditor> SE;
@@ -340,6 +399,10 @@ class RAGreedy : public MachineFunctionPass,
/// obtained from the TargetSubtargetInfo.
bool EnableLocalReassign;
+ /// Enable or not the the consideration of the cost of local intervals created
+ /// by a split candidate when choosing the best split candidate.
+ bool EnableAdvancedRASplitCost;
+
/// Set of broken hints that may be reconciled later because of eviction.
SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
@@ -382,13 +445,24 @@ private:
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
void growRegion(GlobalSplitCandidate &Cand);
- BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
+ bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
+ unsigned BBNumber,
+ const AllocationOrder &Order);
+ BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
+ const AllocationOrder &Order,
+ bool *CanCauseEvictionChain);
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
+ SlotIndex Start, SlotIndex End,
+ EvictionCost &MaxCost);
+ unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg, SlotIndex Start,
+ SlotIndex End, float *BestEvictWeight);
void evictInterference(LiveInterval&, unsigned,
SmallVectorImpl<unsigned>&);
bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
@@ -405,7 +479,8 @@ private:
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR);
+ unsigned &NumCands, bool IgnoreCSR,
+ bool *CanCauseEvictionChain = nullptr);
/// Perform region splitting.
unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
@@ -546,14 +621,17 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
//===----------------------------------------------------------------------===//
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
- LiveInterval &LI = LIS->getInterval(VirtReg);
Matrix->unassign(LI);
aboutToRemoveInterval(LI);
return true;
}
// Unassigned virtreg is probably in the priority queue.
// RegAllocBase will erase it after dequeueing.
+ // Nonetheless, clear the live-range so that the debug
+ // dump will show the right state for that VirtReg.
+ LI.clear();
return false;
}
@@ -685,7 +763,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
if (Order.isHint(Hint)) {
- DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -704,7 +782,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
if (!Cost)
return PhysReg;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost
<< '\n');
unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
return CheapReg ? CheapReg : PhysReg;
@@ -734,7 +812,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
}
if (PhysReg)
DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
- << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
+ << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI)
<< '\n');
return PhysReg;
}
@@ -856,6 +934,92 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
return true;
}
+/// \brief Return true if all interferences between VirtReg and PhysReg between
+/// Start and End can be evicted.
+///
+/// \param VirtReg Live range that is about to be assigned.
+/// \param PhysReg Desired register for assignment.
+/// \param Start Start of range to look for interferences.
+/// \param End End of range to look for interferences.
+/// \param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// \return True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
+ unsigned PhysReg, SlotIndex Start,
+ SlotIndex End,
+ EvictionCost &MaxCost) {
+ EvictionCost Cost;
+
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+
+ // Check if interference overlast the segment in interest.
+ if (!Intf->overlaps(Start, End))
+ continue;
+
+ // Cannot evict non virtual reg interference.
+ if (!TargetRegisterInfo::isVirtualRegister(Intf->reg))
+ return false;
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Done)
+ return false;
+
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ }
+ }
+
+ if (Cost.MaxWeight == 0)
+ return false;
+
+ MaxCost = Cost;
+ return true;
+}
+
+/// \brief Return tthe physical register that will be best
+/// candidate for eviction by a local split interval that will be created
+/// between Start and End.
+///
+/// \param Order The allocation order
+/// \param VirtReg Live range that is about to be assigned.
+/// \param Start Start of range to look for interferences
+/// \param End End of range to look for interferences
+/// \param BestEvictweight The eviction cost of that eviction
+/// \return The PhysReg which is the best candidate for eviction and the
+/// eviction cost in BestEvictweight
+unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg,
+ SlotIndex Start, SlotIndex End,
+ float *BestEvictweight) {
+ EvictionCost BestEvictCost;
+ BestEvictCost.setMax();
+ BestEvictCost.MaxWeight = VirtReg.weight;
+ unsigned BestEvicteePhys = 0;
+
+ // Go over all physical registers and find the best candidate for eviction
+ for (auto PhysReg : Order.getOrder()) {
+
+ if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
+ BestEvictCost))
+ continue;
+
+ // Best so far.
+ BestEvicteePhys = PhysReg;
+ }
+ *BestEvictweight = BestEvictCost.MaxWeight;
+ return BestEvicteePhys;
+}
+
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
@@ -868,7 +1032,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
if (!Cascade)
Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
- DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
// Collect all interfering virtregs first.
@@ -890,6 +1054,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
if (!VRM->hasPhys(Intf->reg))
continue;
+
+ LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg);
+
Matrix->unassign(*Intf);
assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
@@ -957,8 +1124,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// The first use of a callee-saved register in a function has cost 1.
// Don't start using a CSR when the CostPerUseLimit is low.
if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
- << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
+ << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
<< '\n');
continue;
}
@@ -1211,13 +1378,117 @@ BlockFrequency RAGreedy::calcSpillCost() {
return Cost;
}
+/// \brief Check if splitting Evictee will create a local split interval in
+/// basic block number BBNumber that may cause a bad eviction chain. This is
+/// intended to prevent bad eviction sequences like:
+/// movl %ebp, 8(%esp) # 4-byte Spill
+/// movl %ecx, %ebp
+/// movl %ebx, %ecx
+/// movl %edi, %ebx
+/// movl %edx, %edi
+/// cltd
+/// idivl %esi
+/// movl %edi, %edx
+/// movl %ebx, %edi
+/// movl %ecx, %ebx
+/// movl %ebp, %ecx
+/// movl 16(%esp), %ebp # 4 - byte Reload
+///
+/// Such sequences are created in 2 scenarios:
+///
+/// Scenario #1:
+/// %0 is evicted from physreg0 by %1.
+/// Evictee %0 is intended for region splitting with split candidate
+/// physreg0 (the reg %0 was evicted from).
+/// Region splitting creates a local interval because of interference with the
+/// evictor %1 (normally region spliitting creates 2 interval, the "by reg"
+/// and "by stack" intervals and local interval created when interference
+/// occurs).
+/// One of the split intervals ends up evicting %2 from physreg1.
+/// Evictee %2 is intended for region splitting with split candidate
+/// physreg1.
+/// One of the split intervals ends up evicting %3 from physreg2, etc.
+///
+/// Scenario #2
+/// %0 is evicted from physreg0 by %1.
+/// %2 is evicted from physreg2 by %3 etc.
+/// Evictee %0 is intended for region splitting with split candidate
+/// physreg1.
+/// Region splitting creates a local interval because of interference with the
+/// evictor %1.
+/// One of the split intervals ends up evicting back original evictor %1
+/// from physreg0 (the reg %0 was evicted from).
+/// Another evictee %2 is intended for region splitting with split candidate
+/// physreg1.
+/// One of the split intervals ends up evicting %3 from physreg2, etc.
+///
+/// \param Evictee The register considered to be split.
+/// \param Cand The split candidate that determines the physical register
+/// we are splitting for and the interferences.
+/// \param BBNumber The number of a BB for which the region split process will
+/// create a local split interval.
+/// \param Order The phisical registers that may get evicted by a split
+/// artifact of Evictee.
+/// \return True if splitting Evictee may cause a bad eviction chain, false
+/// otherwise.
+bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
+ GlobalSplitCandidate &Cand,
+ unsigned BBNumber,
+ const AllocationOrder &Order) {
+ EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
+ unsigned Evictor = VregEvictorInfo.first;
+ unsigned PhysReg = VregEvictorInfo.second;
+
+ // No actual evictor.
+ if (!Evictor || !PhysReg)
+ return false;
+
+ float MaxWeight = 0;
+ unsigned FutureEvictedPhysReg =
+ getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
+ Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
+
+ // The bad eviction chain occurs when either the split candidate the the
+ // evited reg or one of the split artifact will evict the evicting reg.
+ if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
+ return false;
+
+ Cand.Intf.moveToBlock(BBNumber);
+
+ // Check to see if the Evictor contains interference (with Evictee) in the
+ // given BB. If so, this interference caused the eviction of Evictee from
+ // PhysReg. This suggest that we will create a local interval during the
+ // region split to avoid this interference This local interval may cause a bad
+ // eviction chain.
+ if (!LIS->hasInterval(Evictor))
+ return false;
+ LiveInterval &EvictorLI = LIS->getInterval(Evictor);
+ if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
+ return false;
+
+ // Now, check to see if the local interval we will create is going to be
+ // expensive enough to evict somebody If so, this may cause a bad eviction
+ // chain.
+ VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
+ float splitArtifactWeight =
+ VRAI.futureWeight(LIS->getInterval(Evictee),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
+ return false;
+
+ return true;
+}
+
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
///
-BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+ const AllocationOrder &Order,
+ bool *CanCauseEvictionChain) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
+ unsigned VirtRegToSplit = SA->getParent().reg;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -1226,6 +1497,24 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
unsigned Ins = 0;
+ Cand.Intf.moveToBlock(BC.Number);
+ // Check wheather a local interval is going to be created during the region
+ // split.
+ if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
+ Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn &&
+ RegOut) {
+
+ if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
+ // This interfernce cause our eviction from this assignment, we might
+ // evict somebody else, add that cost.
+ // See splitCanCauseEvictionChain for detailed description of scenarios.
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+
+ *CanCauseEvictionChain = true;
+ }
+ }
+
if (BI.LiveIn)
Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
if (BI.LiveOut)
@@ -1246,6 +1535,20 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
if (Cand.Intf.hasInterference()) {
GlobalCost += SpillPlacer->getBlockFrequency(Number);
GlobalCost += SpillPlacer->getBlockFrequency(Number);
+
+ // Check wheather a local interval is going to be created during the
+ // region split.
+ if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
+ splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
+ // This interfernce cause our eviction from this assignment, we might
+ // evict somebody else, add that cost.
+ // See splitCanCauseEvictionChain for detailed description of
+ // scenarios.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+
+ *CanCauseEvictionChain = true;
+ }
}
continue;
}
@@ -1309,7 +1612,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Create separate intervals for isolated blocks with multiple uses.
if (!IntvIn && !IntvOut) {
- DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
SE->splitSingleBlock(BI);
continue;
@@ -1410,6 +1713,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
unsigned NumCands = 0;
+ BlockFrequency SpillCost = calcSpillCost();
BlockFrequency BestCost;
// Check if we can split this live range around a compact region.
@@ -1421,14 +1725,24 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
} else {
// No benefit from the compact region, our fallback will be per-block
// splitting. Make sure we find a solution that is cheaper than spilling.
- BestCost = calcSpillCost();
+ BestCost = SpillCost;
DEBUG(dbgs() << "Cost of isolating all blocks = ";
MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
+ bool CanCauseEvictionChain = false;
unsigned BestCand =
calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
- false/*IgnoreCSR*/);
+ false /*IgnoreCSR*/, &CanCauseEvictionChain);
+
+ // Split candidates with compact regions can cause a bad eviction sequence.
+ // See splitCanCauseEvictionChain for detailed description of scenarios.
+ // To avoid it, we need to comapre the cost with the spill cost and not the
+ // current max frequency.
+ if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
+ CanCauseEvictionChain) {
+ return 0;
+ }
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
@@ -1440,8 +1754,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands,
- bool IgnoreCSR) {
+ unsigned &NumCands, bool IgnoreCSR,
+ bool *CanCauseEvictionChain) {
unsigned BestCand = NoCand;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
@@ -1476,10 +1790,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
SpillPlacer->prepare(Cand.LiveBundles);
BlockFrequency Cost;
if (!addSplitConstraints(Cand.Intf, Cost)) {
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
continue;
}
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
MBFI->printBlockFreq(dbgs(), Cost));
if (Cost >= BestCost) {
DEBUG({
@@ -1487,7 +1801,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
dbgs() << " worse than no bundles\n";
else
dbgs() << " worse than "
- << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
});
continue;
}
@@ -1501,7 +1815,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
continue;
}
- Cost += calcGlobalSplitCost(Cand);
+ bool HasEvictionChain = false;
+ Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
DEBUG({
dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
<< " with bundles";
@@ -1512,9 +1827,24 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (Cost < BestCost) {
BestCand = NumCands;
BestCost = Cost;
+ // See splitCanCauseEvictionChain for detailed description of bad
+ // eviction chain scenarios.
+ if (CanCauseEvictionChain)
+ *CanCauseEvictionChain = HasEvictionChain;
}
++NumCands;
}
+
+ if (CanCauseEvictionChain && BestCand != NoCand) {
+ // See splitCanCauseEvictionChain for detailed description of bad
+ // eviction chain scenarios.
+ DEBUG(dbgs() << "Best split candidate of vreg "
+ << printReg(VirtReg.reg, TRI) << " may ");
+ if (!(*CanCauseEvictionChain))
+ DEBUG(dbgs() << "not ");
+ DEBUG(dbgs() << "cause bad eviction chain\n");
+ }
+
return BestCand;
}
@@ -1535,7 +1865,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
UsedCands.push_back(BestCand);
Cand.IntvIdx = SE->openIntv();
- DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
<< B << " bundles, intv " << Cand.IntvIdx << ".\n");
(void)B;
}
@@ -1884,7 +2214,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ DEBUG(dbgs() << printReg(PhysReg, TRI) << ' '
<< Uses[SplitBefore] << '-' << Uses[SplitAfter]
<< " i=" << MaxGap);
@@ -1985,7 +2315,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
if (IntvMap[i] == 1) {
setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
- DEBUG(dbgs() << PrintReg(LREdit.get(i)));
+ DEBUG(dbgs() << printReg(LREdit.get(i)));
}
DEBUG(dbgs() << '\n');
}
@@ -2051,6 +2381,15 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Last Chance Recoloring
//===----------------------------------------------------------------------===//
+/// Return true if \p reg has any tied def operand.
+static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
+ for (const MachineOperand &MO : MRI->def_operands(reg))
+ if (MO.isTied())
+ return true;
+
+ return false;
+}
+
/// mayRecolorAllInterferences - Check if the virtual registers that
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
/// recolored to free \p PhysReg.
@@ -2079,10 +2418,13 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
// If Intf is done and sit on the same register class as VirtReg,
// it would not be recolorable as it is in the same state as VirtReg.
- if ((getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg) == CurRC) ||
+ // However, if VirtReg has tied defs and Intf doesn't, then
+ // there is still a point in examining if it can be recolorable.
+ if (((getStage(*Intf) == RS_Done &&
+ MRI->getRegClass(Intf->reg) == CurRC) &&
+ !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||
FixedRegisters.count(Intf->reg)) {
- DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
+ DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n");
return false;
}
RecoloringCandidates.insert(Intf);
@@ -2162,7 +2504,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
- << PrintReg(PhysReg, TRI) << '\n');
+ << printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
VirtRegToPhysReg.clear();
CurrentNewVRegs.clear();
@@ -2170,7 +2512,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// It is only possible to recolor virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) >
LiveRegMatrix::IK_VirtReg) {
- DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
+ DEBUG(dbgs() << "Some interferences are not with virtual registers.\n");
continue;
}
@@ -2179,7 +2521,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// the interferences.
if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
FixedRegisters)) {
- DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
+ DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
continue;
}
@@ -2222,7 +2564,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
}
DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
- << PrintReg(PhysReg, TRI) << '\n');
+ << printReg(PhysReg, TRI) << '\n');
// The recoloring attempt failed, undo the changes.
FixedRegisters = SaveFixedRegisters;
@@ -2285,7 +2627,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
continue;
}
DEBUG(dbgs() << "Recoloring of " << *LI
- << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+ << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
Matrix->assign(*LI, PhysReg);
FixedRegisters.insert(LI->reg);
@@ -2300,7 +2642,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs) {
CutOffInfo = CO_None;
- LLVMContext &Ctx = MF->getFunction()->getContext();
+ LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
@@ -2452,8 +2794,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Visited.insert(Reg);
RecoloringCandidates.push_back(Reg);
- DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
- << PrintReg(PhysReg, TRI) << ")\n");
+ DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '('
+ << printReg(PhysReg, TRI) << ")\n");
do {
Reg = RecoloringCandidates.pop_back_val();
@@ -2474,7 +2816,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Matrix->checkInterference(LI, PhysReg)))
continue;
- DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
+ DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
<< ") is recolorable.\n");
// Gather the hint info.
@@ -2565,6 +2907,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+ // If VirtReg got an assignment, the eviction info is no longre relevant.
+ LastEvicted.clearEvicteeInfo(VirtReg.reg);
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
@@ -2598,6 +2942,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// copy-related live-ranges.
if (Hint && Hint != PhysReg)
SetOfBrokenHints.insert(&VirtReg);
+ // If VirtReg eviction someone, the eviction info for it as an evictee is
+ // no longre relevant.
+ LastEvicted.clearEvicteeInfo(VirtReg.reg);
return PhysReg;
}
@@ -2617,8 +2964,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
- if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+ if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
+ // If VirtReg got split, the eviction info is no longre relevant.
+ LastEvicted.clearEvicteeInfo(VirtReg.reg);
return PhysReg;
+ }
}
// If we couldn't allocate a register from spilling, there is probably some
@@ -2702,17 +3052,20 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
if (Reloads || FoldedReloads || Spills || FoldedSpills) {
using namespace ore;
- MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
- L->getStartLoc(), L->getHeader());
- if (Spills)
- R << NV("NumSpills", Spills) << " spills ";
- if (FoldedSpills)
- R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
- if (Reloads)
- R << NV("NumReloads", Reloads) << " reloads ";
- if (FoldedReloads)
- R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
- ORE->emit(R << "generated in loop");
+ ORE->emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+ L->getStartLoc(), L->getHeader());
+ if (Spills)
+ R << NV("NumSpills", Spills) << " spills ";
+ if (FoldedSpills)
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ if (Reloads)
+ R << NV("NumReloads", Reloads) << " reloads ";
+ if (FoldedReloads)
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ R << "generated in loop";
+ return R;
+ });
}
}
@@ -2729,6 +3082,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MF->getSubtarget().enableRALocalReassignment(
MF->getTarget().getOptLevel());
+ EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
+ MF->getSubtarget().enableAdvancedRASplitCost();
+
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
@@ -2760,6 +3116,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
+ LastEvicted.clear();
allocatePhysRegs();
tryHintsRecoloring();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 9778103575fa..351e91c932eb 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -43,7 +43,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -59,6 +59,8 @@
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -70,8 +72,6 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -668,7 +668,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
(void)TRI;
- DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+ DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
<< LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
@@ -677,7 +677,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
I != E; ++I) {
const LiveInterval &LI = LIS.getInterval(*I);
assert(!LI.empty() && "Empty spill range.");
- DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+ DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
VRegsToAlloc.insert(LI.reg);
}
@@ -707,7 +707,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
- DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
+ DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
<< TRI.getName(PReg) << "\n");
assert(PReg != 0 && "Invalid preg selected.");
VRM.assignVirt2Phys(VReg, PReg);
@@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
findVRegIntervalsToAlloc(MF, LIS);
#ifndef NDEBUG
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
std::string FullyQualifiedName =
F.getParent()->getModuleIdentifier() + "." + F.getName().str();
#endif
@@ -864,7 +864,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
unsigned VReg = G.getNodeMetadata(NId).getVReg();
const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
- OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+ OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
});
}
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index 855aa37ff3c3..f49ea25bbf35 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -27,7 +27,7 @@
#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
using namespace llvm;
@@ -95,7 +95,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
RegMask.resize(RegMaskSize, 0xFFFFFFFF);
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
@@ -127,10 +127,12 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
- TRI->getCallPreservedMask(MF, F->getCallingConv());
- // Set callee saved register as preserved.
- for (unsigned i = 0; i < RegMaskSize; ++i)
- RegMask[i] = RegMask[i] | CallPreservedMask[i];
+ TRI->getCallPreservedMask(MF, F.getCallingConv());
+ if (CallPreservedMask) {
+ // Set callee saved register as preserved.
+ for (unsigned i = 0; i < RegMaskSize; ++i)
+ RegMask[i] = RegMask[i] | CallPreservedMask[i];
+ }
} else {
++NumCSROpt;
DEBUG(dbgs() << MF.getName()
@@ -139,11 +141,11 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
- DEBUG(dbgs() << TRI->getName(PReg) << " ");
+ DEBUG(dbgs() << printReg(PReg, TRI) << " ");
DEBUG(dbgs() << " \n----------------------------------------\n");
- PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask));
+ PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask));
return false;
}
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index 5cc35bfeca63..5b12d00e126f 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -87,14 +88,31 @@ void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+// Assumes call instructions have a single reference to a function.
+static const Function *findCalledFunction(const Module &M, MachineInstr &MI) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isGlobal())
+ return dyn_cast<Function>(MO.getGlobal());
+
+ if (MO.isSymbol())
+ return M.getFunction(MO.getSymbolName());
+ }
+
+ return nullptr;
+}
+
bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
- const Module *M = MF.getFunction()->getParent();
+ const Module *M = MF.getFunction().getParent();
PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
<< " ++++++++++++++++++++ \n");
DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasCalls() && !MFI.hasTailCall())
+ return false;
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -113,15 +131,14 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
Changed = true;
};
- MachineOperand &Operand = MI.getOperand(0);
- if (Operand.isGlobal())
- UpdateRegMask(cast<Function>(Operand.getGlobal()));
- else if (Operand.isSymbol())
- UpdateRegMask(M->getFunction(Operand.getSymbolName()));
+ if (const Function *F = findCalledFunction(*M, MI)) {
+ UpdateRegMask(F);
+ } else {
+ DEBUG(dbgs() << "Failed to find call target function\n");
+ }
- DEBUG(dbgs()
- << "Call Instruction After Register Usage Info Propagation : \n");
- DEBUG(dbgs() << MI << "\n");
+ DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : "
+ << MI << '\n');
}
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 956dec39fc38..b0eeb81f583e 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -20,13 +20,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -153,7 +153,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
DEBUG({
dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
- dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << ' ' << printReg(RCI.Order[I], TRI);
dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
});
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index a67d07b36474..00a2e93c71ca 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1,4 +1,4 @@
-//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,32 +14,49 @@
//===----------------------------------------------------------------------===//
#include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Value.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -53,10 +70,9 @@ STATISTIC(NumInflated , "Number of register classes inflated");
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
-static cl::opt<bool>
-EnableJoining("join-liveintervals",
- cl::desc("Coalesce copies (default=true)"),
- cl::init(true));
+static cl::opt<bool> EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true), cl::Hidden);
static cl::opt<bool> UseTerminalRule("terminal-rule",
cl::desc("Apply the terminal rule"),
@@ -79,11 +95,11 @@ VerifyCoalescing("verify-coalescing",
cl::Hidden);
namespace {
+
class RegisterCoalescer : public MachineFunctionPass,
private LiveRangeEdit::Delegate {
MachineFunction* MF;
MachineRegisterInfo* MRI;
- const TargetMachine* TM;
const TargetRegisterInfo* TRI;
const TargetInstrInfo* TII;
LiveIntervals *LIS;
@@ -211,9 +227,9 @@ namespace {
/// flag.
/// This can happen when undef uses were previously concealed by a copy
/// which we coalesced. Example:
- /// %vreg0:sub0<def,read-undef> = ...
- /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef
- /// = use %vreg1:sub1 <-- hidden undef use
+ /// %0:sub0<def,read-undef> = ...
+ /// %1 = COPY %0 <-- Coalescing COPY reveals undef
+ /// = use %1:sub1 <-- hidden undef use
void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
MachineOperand &MO, unsigned SubRegIdx);
@@ -248,8 +264,19 @@ namespace {
}
}
+ /// Wrapper Method to do all the necessary work when an Instruction is
+ /// deleted.
+ /// Optimizations should use this to make sure that deleted instructions
+ /// are always accounted for.
+ void deleteInstr(MachineInstr* MI) {
+ ErasedInstrs.insert(MI);
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+
public:
static char ID; ///< Class identification, replacement for typeinfo
+
RegisterCoalescer() : MachineFunctionPass(ID) {
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
}
@@ -264,8 +291,11 @@ namespace {
/// Implement the dump method.
void print(raw_ostream &O, const Module* = nullptr) const override;
};
+
} // end anonymous namespace
+char RegisterCoalescer::ID = 0;
+
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
@@ -277,8 +307,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
-char RegisterCoalescer::ID = 0;
-
static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
unsigned &Src, unsigned &Dst,
unsigned &SrcSub, unsigned &DstSub) {
@@ -334,7 +362,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Flipped = true;
}
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
// Eliminate DstSub on a physreg.
@@ -540,7 +568,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// in IntB, we can merge them.
if (ValS+1 != BS) return false;
- DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
+ DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -616,8 +644,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
/// Copy segements with value number @p SrcValNo from liverange @p Src to live
/// range @Dst and use value number @p DstValNo there.
static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
- const LiveRange &Src, const VNInfo *SrcValNo)
-{
+ const LiveRange &Src, const VNInfo *SrcValNo) {
for (const LiveRange::Segment &S : Src.segments) {
if (S.valno != SrcValNo)
continue;
@@ -640,7 +667,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// its other operand is coalesced to the copy dest register, see if we can
// transform the copy into a noop by commuting the definition. For example,
//
- // A3 = op A2 B0<kill>
+ // A3 = op A2 killed B0
// ...
// B1 = A3 <- this copy
// ...
@@ -648,7 +675,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
//
// ==>
//
- // B2 = op B0 A2<kill>
+ // B2 = op B0 killed A2
// ...
// B1 = B2 <- now an identity copy
// ...
@@ -741,7 +768,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// ...
// B = A
// ...
- // C = A<kill>
+ // C = killed A
// ...
// = B
@@ -797,9 +824,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
S.MergeValueNumberInto(SubDVNI, SubBValNo);
}
- ErasedInstrs.insert(UseMI);
- LIS->RemoveMachineInstrFromMaps(*UseMI);
- UseMI->eraseFromParent();
+ deleteInstr(UseMI);
}
// Extend BValNo by merging in IntA live segments of AValNo. Val# definition
@@ -966,8 +991,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// Now ok to move copy.
if (CopyLeftBB) {
- DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
- << CopyLeftBB->getNumber() << '\t' << CopyMI);
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
+ << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
// Insert new copy to CopyLeftBB.
auto InsPos = CopyLeftBB->getFirstTerminator();
@@ -985,18 +1010,16 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// the deleted list.
ErasedInstrs.erase(NewCopyMI);
} else {
- DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
- << MBB.getNumber() << '\t' << CopyMI);
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
+ << printMBBReference(MBB) << '\t' << CopyMI);
}
// Remove CopyMI.
// Note: This is fine to remove the copy before updating the live-ranges.
// While updating the live-ranges, we only look at slot indices and
// never go back to the instruction.
- LIS->RemoveMachineInstrFromMaps(CopyMI);
// Mark instructions as deleted.
- ErasedInstrs.insert(&CopyMI);
- CopyMI.eraseFromParent();
+ deleteInstr(&CopyMI);
// Update the liveness.
SmallVector<SlotIndex, 8> EndPoints;
@@ -1119,10 +1142,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
NewMI.setDebugLoc(DL);
// In a situation like the following:
- // %vreg0:subreg = instr ; DefMI, subreg = DstIdx
- // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
- // instead of widening %vreg1 to the register class of %vreg0 simply do:
- // %vreg1 = instr
+ // %0:subreg = instr ; DefMI, subreg = DstIdx
+ // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
+ // instead of widening %1 to the register class of %0 simply do:
+ // %1 = instr
const TargetRegisterClass *NewRC = CP.getNewRC();
if (DstIdx != 0) {
MachineOperand &DefMO = NewMI.getOperand(0);
@@ -1202,12 +1225,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// This could happen if the rematerialization instruction is rematerializing
// more than actually is used in the register.
// An example would be:
- // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
+ // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
// ; Copying only part of the register here, but the rest is undef.
- // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit
+ // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
// ==>
// ; Materialize all the constants but only using one
- // vreg2 = LOAD_CONSTANTS 5, 8
+ // %2 = LOAD_CONSTANTS 5, 8
//
// at this point for the part that wasn't defined before we could have
// subranges missing the definition.
@@ -1230,11 +1253,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// Make sure that the subrange for resultant undef is removed
// For example:
- // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1
- // vreg2<def> = COPY vreg1
+ // %1:sub1<def,read-undef> = LOAD CONSTANT 1
+ // %2 = COPY %1
// ==>
- // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1
- // ; Correct but need to remove the subrange for vreg2:sub0
+ // %2:sub1<def, read-undef> = LOAD CONSTANT 1
+ // ; Correct but need to remove the subrange for %2:sub0
// ; as it is now undef
if (NewIdx != 0 && DstInt.hasSubRanges()) {
// The affected subregister segments can be removed.
@@ -1268,15 +1291,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// Otherwise, variables that live through may miss some
// interferences, thus creating invalid allocation.
// E.g., i386 code:
- // vreg1 = somedef ; vreg1 GR8
- // vreg2 = remat ; vreg2 GR32
- // CL = COPY vreg2.sub_8bit
- // = somedef vreg1 ; vreg1 GR8
+ // %1 = somedef ; %1 GR8
+ // %2 = remat ; %2 GR32
+ // CL = COPY %2.sub_8bit
+ // = somedef %1 ; %1 GR8
// =>
- // vreg1 = somedef ; vreg1 GR8
- // ECX<def, dead> = remat ; CL<imp-def>
- // = somedef vreg1 ; vreg1 GR8
- // vreg1 will see the inteferences with CL but not with CH since
+ // %1 = somedef ; %1 GR8
+ // dead ECX = remat ; implicit-def CL
+ // = somedef %1 ; %1 GR8
+ // %1 will see the inteferences with CL but not with CH since
// no live-ranges would have been created for ECX.
// Fix that!
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
@@ -1313,6 +1336,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
UseMO.setReg(DstReg);
+ // Move the debug value directly after the def of the rematerialized
+ // value in DstReg.
+ MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
}
}
@@ -1326,9 +1352,9 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// ProcessImpicitDefs may leave some copies of <undef> values, it only removes
// local variables. When we have a copy like:
//
- // %vreg1 = COPY %vreg2<undef>
+ // %1 = COPY undef %2
//
- // We delete the copy and remove the corresponding value number from %vreg1.
+ // We delete the copy and remove the corresponding value number from %1.
// Any uses of that value number are marked as <undef>.
// Note that we do not query CoalescerPair here but redo isMoveInstr as the
@@ -1540,7 +1566,6 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
}
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
-
Again = false;
DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
@@ -1560,7 +1585,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
std::swap(SrcRC, DstRC);
}
if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
- CP.getNewRC())) {
+ CP.getNewRC(), *LIS)) {
DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
return false;
}
@@ -1578,8 +1603,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Eliminate undefs.
if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
- LIS->RemoveMachineInstrFromMaps(*CopyMI);
- CopyMI->eraseFromParent();
+ deleteInstr(CopyMI);
return false; // Not coalescable.
}
@@ -1607,15 +1631,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
- LIS->RemoveMachineInstrFromMaps(*CopyMI);
- CopyMI->eraseFromParent();
+ deleteInstr(CopyMI);
return true;
}
// Enforce policies.
if (CP.isPhys()) {
- DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
- << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI)
+ << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx())
<< '\n');
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
@@ -1637,13 +1660,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
dbgs() << "\tConsidering merging to "
<< TRI->getRegClassName(CP.getNewRC()) << " with ";
if (CP.getDstIdx() && CP.getSrcIdx())
- dbgs() << PrintReg(CP.getDstReg()) << " in "
+ dbgs() << printReg(CP.getDstReg()) << " in "
<< TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
- << PrintReg(CP.getSrcReg()) << " in "
+ << printReg(CP.getSrcReg()) << " in "
<< TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
else
- dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
- << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
+ << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
});
}
@@ -1668,8 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (!CP.isPartial() && !CP.isPhys()) {
if (adjustCopiesBackFrom(CP, CopyMI) ||
removeCopyByCommutingDef(CP, CopyMI)) {
- LIS->RemoveMachineInstrFromMaps(*CopyMI);
- CopyMI->eraseFromParent();
+ deleteInstr(CopyMI);
DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
@@ -1735,11 +1757,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
- << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+ dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+ << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
dbgs() << "\tResult = ";
if (CP.isPhys())
- dbgs() << PrintReg(CP.getDstReg(), TRI);
+ dbgs() << printReg(CP.getDstReg(), TRI);
else
dbgs() << LIS->getInterval(CP.getDstReg());
dbgs() << '\n';
@@ -1774,7 +1796,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return false;
}
if (RHS.overlaps(LIS->getRegUnit(*UI))) {
- DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n');
return false;
}
}
@@ -1797,20 +1819,20 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
MachineInstr *CopyMI;
if (CP.isFlipped()) {
// Physreg is copied into vreg
- // %vregY = COPY %X
- // ... //< no other def of %X here
- // use %vregY
+ // %y = COPY %physreg_x
+ // ... //< no other def of %x here
+ // use %y
// =>
// ...
- // use %X
+ // use %x
CopyMI = MRI->getVRegDef(SrcReg);
} else {
// VReg is copied into physreg:
- // %vregX = def
- // ... //< no other def or use of %Y here
- // %Y = COPY %vregX
+ // %y = def
+ // ... //< no other def or use of %y here
+ // %y = COPY %physreg_x
// =>
- // %Y = def
+ // %y = def
// ...
if (!MRI->hasOneNonDBGUse(SrcReg)) {
DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
@@ -1829,7 +1851,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
if (!MRI->isConstantPhysReg(DstReg)) {
// We checked above that there are no interfering defs of the physical
- // register. However, for this case, where we intent to move up the def of
+ // register. However, for this case, where we intend to move up the def of
// the physical register, we also need to check for interfering uses.
SlotIndexes *Indexes = LIS->getSlotIndexes();
for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
@@ -1844,7 +1866,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// We're going to remove the copy which defines a physical reserved
// register, so remove its valno, etc.
- DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
+ DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI)
<< " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
@@ -1855,8 +1877,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
}
}
- LIS->RemoveMachineInstrFromMaps(*CopyMI);
- CopyMI->eraseFromParent();
+ deleteInstr(CopyMI);
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -1906,7 +1927,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
//
// %dst:ssub0<def,read-undef> = FOO
// %src = BAR
-// %dst:ssub1<def> = COPY %src
+// %dst:ssub1 = COPY %src
//
// The live range of %src overlaps the %dst value defined by FOO, but
// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
@@ -1921,14 +1942,15 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// is live, but never read. This can happen because we don't compute
// individual live ranges per lane.
//
-// %dst<def> = FOO
+// %dst = FOO
// %src = BAR
-// %dst:ssub1<def> = COPY %src
+// %dst:ssub1 = COPY %src
//
// This kind of interference is only resolved locally. If the clobbered
// lane value escapes the block, the join is aborted.
namespace {
+
/// Track information about values in a single virtual register about to be
/// joined. Objects of this class are always created in pairs - one for each
/// side of the CoalescerPair (or one for each lane of a side of the coalescer
@@ -1936,6 +1958,7 @@ namespace {
class JoinVals {
/// Live range we work on.
LiveRange &LR;
+
/// (Main) register we work on.
const unsigned Reg;
@@ -1943,6 +1966,7 @@ class JoinVals {
/// subregister SubIdx in the coalesced register. Either CP.DstIdx or
/// CP.SrcIdx.
const unsigned SubIdx;
+
/// The LaneMask that this liverange will occupy the coalesced register. May
/// be smaller than the lanemask produced by SubIdx when merging subranges.
const LaneBitmask LaneMask;
@@ -1950,6 +1974,7 @@ class JoinVals {
/// This is true when joining sub register ranges, false when joining main
/// ranges.
const bool SubRangeJoin;
+
/// Whether the current LiveInterval tracks subregister liveness.
const bool TrackSubRegLiveness;
@@ -1997,7 +2022,7 @@ class JoinVals {
/// joined register, so they can be compared directly between SrcReg and
/// DstReg.
struct Val {
- ConflictResolution Resolution;
+ ConflictResolution Resolution = CR_Keep;
/// Lanes written by this def, 0 for unanalyzed values.
LaneBitmask WriteLanes;
@@ -2007,10 +2032,10 @@ class JoinVals {
LaneBitmask ValidLanes;
/// Value in LI being redefined by this def.
- VNInfo *RedefVNI;
+ VNInfo *RedefVNI = nullptr;
/// Value in the other live range that overlaps this def, if any.
- VNInfo *OtherVNI;
+ VNInfo *OtherVNI = nullptr;
/// Is this value an IMPLICIT_DEF that can be erased?
///
@@ -2023,18 +2048,16 @@ class JoinVals {
/// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
/// longer live ranges. Such IMPLICIT_DEF values should be treated like
/// normal values.
- bool ErasableImplicitDef;
+ bool ErasableImplicitDef = false;
/// True when the live range of this value will be pruned because of an
/// overlapping CR_Replace value in the other live range.
- bool Pruned;
+ bool Pruned = false;
/// True once Pruned above has been computed.
- bool PrunedComputed;
+ bool PrunedComputed = false;
- Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(),
- RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
- Pruned(false), PrunedComputed(false) {}
+ Val() = default;
bool isAnalyzed() const { return WriteLanes.any(); }
};
@@ -2081,8 +2104,9 @@ class JoinVals {
/// entry to TaintedVals.
///
/// Returns false if the tainted lanes extend beyond the basic block.
- bool taintExtent(unsigned, LaneBitmask, JoinVals&,
- SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
+ bool
+ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
@@ -2104,8 +2128,7 @@ public:
: LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
- TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums())
- {}
+ TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
/// Analyze defs in LR and compute a value mapping in NewVNInfo.
/// Returns false if any conflicts were impossible to resolve.
@@ -2149,6 +2172,7 @@ public:
/// Get the value assignments suitable for passing to LiveInterval::join.
const int *getAssignments() const { return Assignments.data(); }
};
+
} // end anonymous namespace
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
@@ -2239,7 +2263,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
- LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1)
+ LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
: TRI->getSubRegIndexLaneMask(SubIdx);
V.ValidLanes = V.WriteLanes = Lanes;
} else {
@@ -2247,7 +2271,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
assert(DefMI != nullptr);
if (SubRangeJoin) {
// We don't care about the lanes when joining subregister ranges.
- V.WriteLanes = V.ValidLanes = LaneBitmask(1);
+ V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
if (DefMI->isImplicitDef()) {
V.ValidLanes = LaneBitmask::getNone();
V.ErasableImplicitDef = true;
@@ -2263,7 +2287,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// This adds ssub1 to the set of valid lanes in %src:
//
- // %src:ssub1<def> = FOO
+ // %src:ssub1 = FOO
//
// This leaves only ssub1 valid, making any other lanes undef:
//
@@ -2352,7 +2376,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
if (OtherV.ErasableImplicitDef && DefMI &&
DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
- << " extends into BB#" << DefMI->getParent()->getNumber()
+ << " extends into " << printMBBReference(*DefMI->getParent())
<< ", keeping it.\n");
OtherV.ErasableImplicitDef = false;
}
@@ -2401,9 +2425,9 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// 1 %dst:ssub0 = FOO <-- OtherVNI
// 2 %src = BAR <-- VNI
- // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
- // 4 BAZ %dst<kill>
- // 5 QUUX %src<kill>
+ // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
+ // 4 BAZ killed %dst
+ // 5 QUUX killed %src
//
// Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
// handles this complex value mapping.
@@ -2413,7 +2437,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// If the other live range is killed by DefMI and the live ranges are still
// overlapping, it must be because we're looking at an early clobber def:
//
- // %dst<def,early-clobber> = ASM %src<kill>
+ // %dst<def,early-clobber> = ASM killed %src
//
// In this case, it is illegal to merge the two live ranges since the early
// clobber def would clobber %src before it was read.
@@ -2463,9 +2487,9 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
- DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@'
+ DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
<< LR.getValNumInfo(ValNo)->def << " into "
- << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
+ << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
<< V.OtherVNI->def << " --> @"
<< NewVNInfo[Assignments[ValNo]]->def << '\n');
break;
@@ -2493,7 +2517,7 @@ bool JoinVals::mapValues(JoinVals &Other) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
computeAssignment(i, Other);
if (Vals[i].Resolution == CR_Impossible) {
- DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i
+ DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
<< '@' << LR.getValNumInfo(i)->def << '\n');
return false;
}
@@ -2503,7 +2527,7 @@ bool JoinVals::mapValues(JoinVals &Other) {
bool JoinVals::
taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
- SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
VNInfo *VNI = LR.getValNumInfo(ValNo);
MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
@@ -2516,11 +2540,11 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
// lanes escape the block.
SlotIndex End = OtherI->end;
if (End >= MBBEnd) {
- DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':'
+ DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
<< OtherI->valno->id << '@' << OtherI->start << '\n');
return false;
}
- DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':'
+ DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
<< OtherI->valno->id << '@' << OtherI->start
<< " to " << End << '\n');
// A dead def is not a problem.
@@ -2560,10 +2584,10 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
bool JoinVals::resolveConflicts(JoinVals &Other) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
Val &V = Vals[i];
- assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+ assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
if (V.Resolution != CR_Unresolved)
continue;
- DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i
+ DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i
<< '@' << LR.getValNumInfo(i)->def << '\n');
if (SubRangeJoin)
return false;
@@ -2598,7 +2622,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
Indexes->getInstructionFromIndex(TaintExtent.front().first);
assert(LastMI && "Range must end at a proper instruction");
unsigned TaintNum = 0;
- for (;;) {
+ while (true) {
assert(MI != MBB->end() && "Bad LastMI");
if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
@@ -2658,13 +2682,13 @@ void JoinVals::pruneValues(JoinVals &Other,
if (!Def.isBlock()) {
if (changeInstrs) {
// Remove <def,read-undef> flags. This def is now a partial redef.
- // Also remove <def,dead> flags since the joined live range will
+ // Also remove dead flags since the joined live range will
// continue past this instruction.
for (MachineOperand &MO :
Indexes->getInstructionFromIndex(Def)->operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
- if (MO.getSubReg() != 0)
- MO.setIsUndef(EraseImpDef);
+ if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
+ MO.setIsUndef(false);
MO.setIsDead(false);
}
}
@@ -2674,7 +2698,7 @@ void JoinVals::pruneValues(JoinVals &Other,
if (!EraseImpDef)
EndPoints.push_back(Def);
}
- DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def
+ DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
<< ": " << Other.LR << '\n');
break;
}
@@ -2686,7 +2710,7 @@ void JoinVals::pruneValues(JoinVals &Other,
// computeAssignment(), the value that was originally copied could have
// been replaced.
LIS->pruneValue(LR, Def, &EndPoints);
- DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at "
+ DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
<< Def << ": " << LR << '\n');
}
break;
@@ -2994,7 +3018,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
R.LaneMask = Mask;
}
}
- DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg())
+ DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg())
<< ' ' << LHS << '\n');
// Determine lanemasks of RHS in the coalesced register and merge subranges.
@@ -3068,6 +3092,7 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
}
namespace {
+
/// Information concerning MBB coalescing priority.
struct MBBPriorityInfo {
MachineBasicBlock *MBB;
@@ -3077,7 +3102,8 @@ struct MBBPriorityInfo {
MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
: MBB(mbb), Depth(depth), IsSplit(issplit) {}
};
-}
+
+} // end anonymous namespace
/// C-style comparator that sorts first based on the loop depth of the basic
/// block (the unsigned), and then on the MBB number.
@@ -3194,7 +3220,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
continue;
// Check that OtherReg interfere with DstReg.
if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
- DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+ DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n');
return true;
}
}
@@ -3281,7 +3307,7 @@ void RegisterCoalescer::joinAllIntervals() {
array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
// Coalesce intervals in MBB priority order.
- unsigned CurrDepth = UINT_MAX;
+ unsigned CurrDepth = std::numeric_limits<unsigned>::max();
for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
// Try coalescing the collected local copies for deeper loops.
if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
@@ -3308,7 +3334,6 @@ void RegisterCoalescer::releaseMemory() {
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &fn.getRegInfo();
- TM = &fn.getTarget();
const TargetSubtargetInfo &STI = fn.getSubtarget();
TRI = STI.getRegisterInfo();
TII = STI.getInstrInfo();
@@ -3349,7 +3374,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (MRI->reg_nodbg_empty(Reg))
continue;
if (MRI->recomputeRegClass(Reg)) {
- DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ DEBUG(dbgs() << printReg(Reg) << " inflated to "
<< TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
++NumInflated;
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 04067a1427af..1a46f6d053e6 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
+//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,10 +17,9 @@
namespace llvm {
- class MachineInstr;
- class TargetRegisterInfo;
- class TargetRegisterClass;
- class TargetInstrInfo;
+class MachineInstr;
+class TargetRegisterClass;
+class TargetRegisterInfo;
/// A helper class for register coalescers. When deciding if
/// two registers can be coalesced, CoalescerPair can determine if a copy
@@ -30,43 +29,40 @@ namespace llvm {
/// The register that will be left after coalescing. It can be a
/// virtual or physical register.
- unsigned DstReg;
+ unsigned DstReg = 0;
/// The virtual register that will be coalesced into dstReg.
- unsigned SrcReg;
+ unsigned SrcReg = 0;
/// The sub-register index of the old DstReg in the new coalesced register.
- unsigned DstIdx;
+ unsigned DstIdx = 0;
/// The sub-register index of the old SrcReg in the new coalesced register.
- unsigned SrcIdx;
+ unsigned SrcIdx = 0;
/// True when the original copy was a partial subregister copy.
- bool Partial;
+ bool Partial = false;
/// True when both regs are virtual and newRC is constrained.
- bool CrossClass;
+ bool CrossClass = false;
/// True when DstReg and SrcReg are reversed from the original
/// copy instruction.
- bool Flipped;
+ bool Flipped = false;
/// The register class of the coalesced register, or NULL if DstReg
/// is a physreg. This register class may be a super-register of both
/// SrcReg and DstReg.
- const TargetRegisterClass *NewRC;
+ const TargetRegisterClass *NewRC = nullptr;
public:
- CoalescerPair(const TargetRegisterInfo &tri)
- : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
- Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+ CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri) {}
/// Create a CoalescerPair representing a virtreg-to-physreg copy.
/// No need to call setRegisters().
CoalescerPair(unsigned VirtReg, unsigned PhysReg,
const TargetRegisterInfo &tri)
- : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
- Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
/// Set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
@@ -111,6 +107,7 @@ namespace llvm {
/// Return the register class of the coalesced register.
const TargetRegisterClass *getNewRC() const { return NewRC; }
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 88e0a3b58940..9ac810c7c723 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -26,14 +26,14 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -97,7 +97,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dumpRegSetPressure(MaxSetPressure, TRI);
dbgs() << "Live In: ";
for (const RegisterMaskPair &P : LiveInRegs) {
- dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ dbgs() << printVRegOrUnit(P.RegUnit, TRI);
if (!P.LaneMask.all())
dbgs() << ':' << PrintLaneMask(P.LaneMask);
dbgs() << ' ';
@@ -105,7 +105,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << '\n';
dbgs() << "Live Out: ";
for (const RegisterMaskPair &P : LiveOutRegs) {
- dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ dbgs() << printVRegOrUnit(P.RegUnit, TRI);
if (!P.LaneMask.all())
dbgs() << ':' << PrintLaneMask(P.LaneMask);
dbgs() << ' ';
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index fc5105aadbff..97967124add6 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,30 +16,33 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterScavenging.h"
-
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
#include <string>
+#include <utility>
using namespace llvm;
@@ -210,7 +213,7 @@ void RegScavenger::forward() {
continue;
if (!isRegUsed(Reg)) {
// Check if it's partial live: e.g.
- // D0 = insert_subreg D0<undef>, S0
+ // D0 = insert_subreg undef D0, S0
// ... D0
// The problem is the insert_subreg could be eliminated. The use of
// D0 is using a partially undef value. This is not *incorrect* since
@@ -285,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (unsigned Reg : *RC) {
if (!isRegUsed(Reg)) {
- DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) <<
- "\n");
+ DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
+ << "\n");
return Reg;
}
}
@@ -460,7 +463,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
- const MachineFunction &MF = *Before->getParent()->getParent();
+ const MachineFunction &MF = *Before->getMF();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned NeedSize = TRI->getSpillSize(RC);
unsigned NeedAlign = TRI->getSpillAlignment(RC);
@@ -533,7 +536,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineFunction &MF = *MI.getMF();
// Consider all allocatable registers in the register class initially
BitVector Candidates = TRI->getAllocatableSet(MF, RC);
@@ -558,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// If we found an unused register there is no reason to spill it.
if (!isRegUsed(SReg)) {
- DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
return SReg;
}
ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
Scavenged.Restore = &*std::prev(UseMI);
- DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
- "\n");
+ DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI)
+ << "\n");
return SReg;
}
@@ -595,10 +598,10 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
Scavenged.Restore = &*std::prev(SpillBefore);
LiveUnits.removeReg(Reg);
- DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI)
- << " until " << *SpillBefore);
+ DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+ << " until " << *SpillBefore);
} else {
- DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n');
+ DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n');
}
return Reg;
}
@@ -769,13 +772,16 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
}
namespace {
+
/// This class runs register scavenging independ of the PrologEpilogInserter.
/// This is used in for testing.
class ScavengerTest : public MachineFunctionPass {
public:
static char ID;
+
ScavengerTest() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) {
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetFrameLowering &TFL = *STI.getFrameLowering();
@@ -792,9 +798,10 @@ public:
return true;
}
};
-char ScavengerTest::ID;
} // end anonymous namespace
+char ScavengerTest::ID;
+
INITIALIZE_PASS(ScavengerTest, "scavenger-test",
"Scavenge virtual registers inside basic blocks", false, false)
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 30757f070cad..4e42deb406e1 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -12,17 +12,17 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -97,7 +97,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
- OS << TRI->getName(PReg) << " ";
+ OS << printReg(PReg, TRI) << " ";
}
OS << "\n";
}
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index bd5ecbd28f29..1e1f36a35ecc 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -10,32 +10,32 @@
/// Rename independent subregisters looks for virtual registers with
/// independently used subregisters and renames them to new virtual registers.
/// Example: In the following:
-/// %vreg0:sub0<read-undef> = ...
-/// %vreg0:sub1 = ...
-/// use %vreg0:sub0
-/// %vreg0:sub0 = ...
-/// use %vreg0:sub0
-/// use %vreg0:sub1
+/// %0:sub0<read-undef> = ...
+/// %0:sub1 = ...
+/// use %0:sub0
+/// %0:sub0 = ...
+/// use %0:sub0
+/// use %0:sub1
/// sub0 and sub1 are never used together, and we have two independent sub0
/// definitions. This pass will rename to:
-/// %vreg0:sub0<read-undef> = ...
-/// %vreg1:sub1<read-undef> = ...
-/// use %vreg1:sub1
-/// %vreg2:sub1<read-undef> = ...
-/// use %vreg2:sub1
-/// use %vreg0:sub0
+/// %0:sub0<read-undef> = ...
+/// %1:sub1<read-undef> = ...
+/// use %1:sub1
+/// %2:sub1<read-undef> = ...
+/// use %2:sub1
+/// use %0:sub0
//
//===----------------------------------------------------------------------===//
#include "LiveRangeUtils.h"
#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
@@ -134,15 +134,15 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
SmallVector<LiveInterval*, 4> Intervals;
Intervals.push_back(&LI);
- DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses()
+ DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses()
<< " equivalence classes.\n");
- DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:");
+ DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
++I) {
unsigned NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
Intervals.push_back(&NewLI);
- DEBUG(dbgs() << ' ' << PrintReg(NewVReg));
+ DEBUG(dbgs() << ' ' << printReg(NewVReg));
}
DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index 01b3db43b283..f1885aa74285 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -51,7 +51,7 @@ namespace {
++NumFunctionsReset;
MF.reset();
if (EmitFallbackDiag) {
- const Function &F = *MF.getFunction();
+ const Function &F = MF.getFunction();
DiagnosticInfoISelFallback DiagFallback(F);
F.getContext().diagnose(DiagFallback);
}
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 8584a9b7c897..51233be521be 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===//
+//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,37 +17,56 @@
#include "SafeStackColoring.h"
#include "SafeStackLayout.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
using namespace llvm;
using namespace llvm::safestack;
@@ -255,16 +274,16 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
assert(V == UI.get());
switch (I->getOpcode()) {
- case Instruction::Load: {
+ case Instruction::Load:
if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr,
AllocaSize))
return false;
break;
- }
+
case Instruction::VAArg:
// "va-arg" from a pointer is safe.
break;
- case Instruction::Store: {
+ case Instruction::Store:
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
@@ -276,11 +295,10 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
AllocaPtr, AllocaSize))
return false;
break;
- }
- case Instruction::Ret: {
+
+ case Instruction::Ret:
// Information leak.
return false;
- }
case Instruction::Call:
case Instruction::Invoke: {
@@ -372,7 +390,7 @@ void SafeStack::findInsts(Function &F,
StackRestorePoints.push_back(LP);
} else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::gcroot)
- llvm::report_fatal_error(
+ report_fatal_error(
"gcroot intrinsic not compatible with safestack attribute");
}
}
@@ -540,7 +558,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// Replace alloc with the new location.
replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- /*Deref=*/false, -Offset);
+ DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
@@ -555,7 +573,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref,
+ -Offset, DIExpression::NoDeref);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -645,7 +664,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false);
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0,
+ DIExpression::NoDeref);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -764,11 +784,12 @@ bool SafeStack::run() {
}
class SafeStackLegacyPass : public FunctionPass {
- const TargetMachine *TM;
+ const TargetMachine *TM = nullptr;
public:
static char ID; // Pass identification, replacement for typeid..
- SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) {
+
+ SafeStackLegacyPass() : FunctionPass(ID) {
initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -817,9 +838,10 @@ public:
}
};
-} // anonymous namespace
+} // end anonymous namespace
char SafeStackLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
"Safe Stack instrumentation pass", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 21f2fa497233..072e6e090e1e 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===//
+//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,25 @@
//===----------------------------------------------------------------------===//
#include "SafeStackColoring.h"
-
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+#include <utility>
using namespace llvm;
using namespace llvm::safestack;
diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h
index 08b179ccb7f1..902e63ebeb7e 100644
--- a/lib/CodeGen/SafeStackColoring.h
+++ b/lib/CodeGen/SafeStackColoring.h
@@ -1,4 +1,4 @@
-//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===//
+//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,16 +10,23 @@
#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
namespace llvm {
-class AllocaInst;
+
+class BasicBlock;
+class Function;
+class Instruction;
namespace safestack {
+
/// Compute live ranges of allocas.
/// Live ranges are represented as sets of "interesting" instructions, which are
/// defined as instructions that may start or end an alloca's lifetime. These
@@ -35,10 +42,13 @@ class StackColoring {
struct BlockLifetimeInfo {
/// Which slots BEGINs in each basic block.
BitVector Begin;
+
/// Which slots ENDs in each basic block.
BitVector End;
+
/// Which slots are marked as LIVE_IN, coming into each basic block.
BitVector LiveIn;
+
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
BitVector LiveOut;
};
@@ -48,11 +58,14 @@ public:
/// live.
struct LiveRange {
BitVector bv;
+
void SetMaximum(int size) { bv.resize(size); }
void AddRange(unsigned start, unsigned end) { bv.set(start, end); }
+
bool Overlaps(const LiveRange &Other) const {
return bv.anyCommon(Other.bv);
}
+
void Join(const LiveRange &Other) { bv |= Other.bv; }
};
@@ -60,13 +73,15 @@ private:
Function &F;
/// Maps active slots (per bit) for each basic block.
- typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap;
+ using LivenessMap = DenseMap<BasicBlock *, BlockLifetimeInfo>;
LivenessMap BlockLiveness;
/// Number of interesting instructions.
- int NumInst;
+ int NumInst = -1;
+
/// Numeric ids for interesting instructions.
DenseMap<Instruction *, unsigned> InstructionNumbering;
+
/// A range [Start, End) of instruction ids for each basic block.
/// Instructions inside each BB have monotonic and consecutive ids.
DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
@@ -74,6 +89,7 @@ private:
ArrayRef<AllocaInst *> Allocas;
unsigned NumAllocas;
DenseMap<AllocaInst *, unsigned> AllocaNumbering;
+
/// LiveRange for allocas.
SmallVector<LiveRange, 8> LiveRanges;
@@ -101,7 +117,7 @@ private:
public:
StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas)
- : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {}
+ : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {}
void run();
void removeAllMarkers();
@@ -143,7 +159,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
return OS << R.bv;
}
-} // namespace safestack
-} // namespace llvm
+} // end namespace safestack
+
+} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp
index 7d4dbd13abf4..b1759359e46f 100644
--- a/lib/CodeGen/SafeStackLayout.cpp
+++ b/lib/CodeGen/SafeStackLayout.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===//
+//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,9 +8,15 @@
//===----------------------------------------------------------------------===//
#include "SafeStackLayout.h"
-
-#include "llvm/IR/Instructions.h"
+#include "SafeStackColoring.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
using namespace llvm::safestack;
diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h
index 313ed21c8869..7c1292f251f7 100644
--- a/lib/CodeGen/SafeStackLayout.h
+++ b/lib/CodeGen/SafeStackLayout.h
@@ -1,4 +1,4 @@
-//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===//
+//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,14 @@
#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
#include "SafeStackColoring.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
namespace llvm {
+
+class raw_ostream;
+class Value;
+
namespace safestack {
/// Compute the layout of an unsafe stack frame.
@@ -23,10 +29,12 @@ class StackLayout {
unsigned Start;
unsigned End;
StackColoring::LiveRange Range;
+
StackRegion(unsigned Start, unsigned End,
const StackColoring::LiveRange &Range)
: Start(Start), End(End), Range(Range) {}
};
+
/// The list of current stack regions, sorted by StackRegion::Start.
SmallVector<StackRegion, 16> Regions;
@@ -35,6 +43,7 @@ class StackLayout {
unsigned Size, Alignment;
StackColoring::LiveRange Range;
};
+
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
@@ -43,6 +52,7 @@ class StackLayout {
public:
StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
void addObject(const Value *V, unsigned Size, unsigned Alignment,
@@ -59,10 +69,12 @@ public:
/// Returns the alignment of the frame.
unsigned getFrameAlignment() { return MaxAlignment; }
+
void print(raw_ostream &OS);
};
-} // namespace safestack
-} // namespace llvm
+} // end namespace safestack
+
+} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 07b43a82ca99..cef413f9d410 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -1,5 +1,5 @@
-//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===//
-//=== instrinsics ===//
+//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
+// instrinsics
//
// The LLVM Compiler Infrastructure
//
@@ -14,9 +14,26 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
@@ -25,13 +42,15 @@ using namespace llvm;
namespace {
class ScalarizeMaskedMemIntrin : public FunctionPass {
- const TargetTransformInfo *TTI;
+ const TargetTransformInfo *TTI = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
- explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) {
+
+ explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
}
+
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
@@ -46,9 +65,11 @@ private:
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
};
-} // namespace
+
+} // end anonymous namespace
char ScalarizeMaskedMemIntrin::ID = 0;
+
INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
"Scalarize unsupported masked memory intrinsics", false, false)
@@ -156,7 +177,6 @@ static void scalarizeMaskedLoad(CallInst *CI) {
Value *PrevPhi = UndefVal;
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
// Fill the "else" block, created in the previous iteration
//
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
@@ -288,7 +308,6 @@ static void scalarizeMaskedStore(CallInst *CI) {
}
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
@@ -408,7 +427,6 @@ static void scalarizeMaskedGather(CallInst *CI) {
Value *PrevPhi = UndefVal;
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
// Fill the "else" block, created in the previous iteration
//
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
@@ -610,13 +628,12 @@ bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
-
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
default:
break;
- case Intrinsic::masked_load: {
+ case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
if (!TTI->isLegalMaskedLoad(CI->getType())) {
scalarizeMaskedLoad(CI);
@@ -624,24 +641,21 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
return true;
}
return false;
- }
- case Intrinsic::masked_store: {
+ case Intrinsic::masked_store:
if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
scalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
return false;
- }
- case Intrinsic::masked_gather: {
+ case Intrinsic::masked_gather:
if (!TTI->isLegalMaskedGather(CI->getType())) {
scalarizeMaskedGather(CI);
ModifiedDT = true;
return true;
}
return false;
- }
- case Intrinsic::masked_scatter: {
+ case Intrinsic::masked_scatter:
if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
scalarizeMaskedScatter(CI);
ModifiedDT = true;
@@ -649,7 +663,6 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
}
return false;
}
- }
}
return false;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 5e95f760aaa2..0635e8f41ee7 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,13 +19,13 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -80,7 +80,7 @@ raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
case Data:
OS << " Latency=" << getLatency();
if (TRI && isAssignedRegDep())
- OS << " Reg=" << PrintReg(getReg(), TRI);
+ OS << " Reg=" << printReg(getReg(), TRI);
break;
case Anti:
case Output:
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 99baa07390eb..9249fa84b38b 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,7 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,6 +36,8 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
@@ -52,8 +54,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -114,16 +114,18 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction()->getContext()))) {
+ Type::getVoidTy(mf.getFunction().getContext()))) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
SchedModel.init(ST.getSchedModel(), &ST, TII);
}
-/// If this machine instr has memory reference information and it can be tracked
-/// to a normal reference to a known object, return the Value for that object.
-static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+/// If this machine instr has memory reference information and it can be
+/// tracked to a normal reference to a known object, return the Value
+/// for that object. This function returns false the memory location is
+/// unknown or may alias anything.
+static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
const DataLayout &DL) {
@@ -151,7 +153,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- getUnderlyingObjectsForCodeGen(V, Objs, DL);
+ if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
+ return false;
for (Value *V : Objs) {
assert(isIdentifiedObject(V));
@@ -163,8 +166,12 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
return true;
};
- if (!allMMOsOkay())
+ if (!allMMOsOkay()) {
Objects.clear();
+ return false;
+ }
+
+ return true;
}
void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -769,7 +776,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (PDiffs != nullptr)
PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
- RPTracker->recedeSkipDebugValues();
+ if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
+ RPTracker->recedeSkipDebugValues();
assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
RPTracker->recede(RegOpers);
}
@@ -860,13 +868,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Find the underlying objects for MI. The Objs vector is either
// empty, or filled with the Values of memory locations which this
- // SU depends on. An empty vector means the memory location is
- // unknown, and may alias anything.
+ // SU depends on.
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout());
+ bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
+ MF.getDataLayout());
if (MI.mayStore()) {
- if (Objs.empty()) {
+ if (!ObjsFound) {
// An unknown store depends on all stores and loads.
addChainDependencies(SU, Stores);
addChainDependencies(SU, NonAliasStores);
@@ -901,7 +909,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addChainDependencies(SU, Stores, UnknownValue);
}
} else { // SU is a load.
- if (Objs.empty()) {
+ if (!ObjsFound) {
// An unknown load depends on all stores.
addChainDependencies(SU, Stores);
addChainDependencies(SU, NonAliasStores);
@@ -1036,7 +1044,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
}
void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
- DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
+ DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
LiveRegs.init(*TRI);
LiveRegs.addLiveOuts(MBB);
@@ -1348,7 +1356,7 @@ static bool hasDataSucc(const SUnit *SU) {
/// search from this root.
void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
- llvm_unreachable("Top-down ILP metric is unimplemnted");
+ llvm_unreachable("Top-down ILP metric is unimplemented");
SchedDFSImpl Impl(*this);
for (const SUnit &SU : SUnits) {
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index bb6a45996f63..37c4a470bd0a 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -14,14 +14,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include <fstream>
using namespace llvm;
namespace llvm {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b3d83d5313af..b789e2d9c52c 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -15,12 +15,12 @@
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include <cassert>
using namespace llvm;
@@ -32,6 +32,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
const char *ParentDebugType)
: ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
DAG(SchedDAG) {
+ (void)DebugType;
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
// avoid dealing with the boundary condition.
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index ae9c5adb0397..fd1e5e2cfc56 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG
SelectionDAGTargetInfo.cpp
StatepointLowering.cpp
TargetLowering.cpp
-
+
DEPENDS
intrinsics_gen
)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 432c86dd6f1e..f97732c1c49d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,4 +1,4 @@
-//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,32 +16,64 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "dagcombine"
@@ -53,43 +85,41 @@ STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
-namespace {
- static cl::opt<bool>
- CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner's use of IR alias analysis"));
+static cl::opt<bool>
+CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
- static cl::opt<bool>
- UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
- cl::desc("Enable DAG combiner's use of TBAA"));
+static cl::opt<bool>
+UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
#ifndef NDEBUG
- static cl::opt<std::string>
- CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
- cl::desc("Only use DAG-combiner alias analysis in this"
- " function"));
+static cl::opt<std::string>
+CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
#endif
- /// Hidden option to stress test load slicing, i.e., when this option
- /// is enabled, load slicing bypasses most of its profitability guards.
- static cl::opt<bool>
- StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
- cl::desc("Bypass the profitability model of load "
- "slicing"),
- cl::init(false));
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+static cl::opt<bool>
+StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load slicing"),
+ cl::init(false));
- static cl::opt<bool>
- MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
- cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
-//------------------------------ DAGCombiner ---------------------------------//
+namespace {
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
- bool LegalOperations;
- bool LegalTypes;
+ bool LegalOperations = false;
+ bool LegalTypes = false;
bool ForCodeSize;
/// \brief Worklist of all of the nodes that need to be simplified.
@@ -128,6 +158,19 @@ namespace {
SDValue visit(SDNode *N);
public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), AA(AA) {
+ ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
+ }
+
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
@@ -285,7 +328,7 @@ namespace {
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
- SDValue visitAssertZext(SDNode *N);
+ SDValue visitAssertExt(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -348,6 +391,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldVSelectOfConstants(SDNode *N);
SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -371,6 +415,7 @@ namespace {
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -400,14 +445,11 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue reduceBuildVecToTrunc(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx);
SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
- SDValue GetDemandedBits(SDValue V, const APInt &Mask);
-
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
@@ -434,12 +476,14 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink(LSBaseSDNode *N, int64_t Offset)
- : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
+
// Offset from the base ptr.
int64_t OffsetFromBase;
+
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
};
/// This is a helper function for visitMUL to check the profitability
@@ -450,38 +494,49 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
-
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
- /// the type of the loaded value to be extended. LoadedVT returns the type
- /// of the original loaded value. NarrowLoad returns whether the load would
- /// need to be narrowed in order to match.
+ /// the type of the loaded value to be extended.
bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
- EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
- bool &NarrowLoad);
+ EVT LoadResultTy, EVT &ExtVT);
+
+ /// Helper function to calculate whether the given Load can have its
+ /// width reduced to ExtVT.
+ bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+ EVT &ExtVT, unsigned ShAmt = 0);
+
+ /// Used by BackwardsPropagateMask to find suitable loads.
+ bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodeWithConsts,
+ ConstantSDNode *Mask, SDNode *&UncombinedNode);
+ /// Attempt to propagate a given AND node back to load leaves so that they
+ /// can be combined into narrow loads.
+ bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
/// Helper function for MergeConsecutiveStores which merges the
/// component store chains.
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
- /// This is a helper function for MergeConsecutiveStores. When the source
- /// elements of the consecutive stores are all constants or all extracted
- /// vector elements, try to merge them into one larger store.
- /// \return True if a merged store was created.
+ /// This is a helper function for MergeConsecutiveStores. When the
+ /// source elements of the consecutive stores are all constants or
+ /// all extracted vector elements, try to merge them into one
+ /// larger store introducing bitcasts if necessary. \return True
+ /// if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector,
bool UseTrunc);
- /// This is a helper function for MergeConsecutiveStores.
- /// Stores that may be merged are placed in StoreNodes.
+ /// This is a helper function for MergeConsecutiveStores. Stores
+ /// that potentially may be merged with St are placed in
+ /// StoreNodes.
void getStoreMergeCandidates(StoreSDNode *St,
SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
- /// Candidate stores have indirect dependency through their
- /// operands. \return True if safe to merge
+ /// candidate stores have indirect dependency through their
+ /// operands. \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
@@ -500,19 +555,6 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
-
- MaximumLegalStoreInBits = 0;
- for (MVT VT : MVT::all_valuetypes())
- if (EVT(VT).isSimple() && VT != MVT::Other &&
- TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits();
- }
-
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -541,14 +583,12 @@ namespace {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
};
-}
-
-namespace {
/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
+
public:
explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
@@ -557,7 +597,8 @@ public:
DC.removeFromWorklist(N);
}
};
-}
+
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
@@ -577,7 +618,6 @@ CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
-
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
@@ -873,6 +913,56 @@ static bool isAnyConstantBuildVector(const SDNode *N) {
ISD::isBuildVectorOfConstantFPSDNodes(N);
}
+// Attempt to match a unary predicate against a scalar/splat constant or
+// every element of a constant BUILD_VECTOR.
+static bool matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match) {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+// Attempt to match a binary predicate against a pair of scalar/splat constants
+// or every element of a pair of constant BUILD_VECTORs.
+static bool matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ if (LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!LHSCst || !RHSCst)
+ return false;
+ if (LHSCst->getValueType(0) != SVT ||
+ LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@@ -1123,10 +1213,10 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
Replace0 &= !N0->hasOneUse();
Replace1 &= (N0 != N1) && !N1->hasOneUse();
- // Combine Op here so it is presreved past replacements.
+ // Combine Op here so it is preserved past replacements.
CombineTo(Op.getNode(), RV);
- // If operands have a use ordering, make sur we deal with
+ // If operands have a use ordering, make sure we deal with
// predecessor first.
if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
std::swap(N0, N1);
@@ -1473,7 +1563,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
- case ISD::AssertZext: return visitAssertZext(N);
+ case ISD::AssertSext:
+ case ISD::AssertZext: return visitAssertExt(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1572,15 +1663,15 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
}
- // If N is a commutative binary node, try commuting it to enable more
- // sdisel CSE.
+ // If N is a commutative binary node, try eliminate it if the commuted
+ // version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
- if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
SDValue Ops[] = {N1, N0};
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
N->getFlags());
@@ -1632,7 +1723,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
-
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
@@ -1907,6 +1997,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
}
+
+ // Undo the add -> or combine to merge constant offsets from a frame index.
+ if (N0.getOpcode() == ISD::OR &&
+ isa<FrameIndexSDNode>(N0.getOperand(0)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+ }
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2064,7 +2163,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
}
// (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+ N1.getResNo() == 0)
return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
@@ -2537,6 +2637,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+ assert((!N0IsConst ||
+ ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
+ assert((!N1IsConst ||
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
} else {
N0IsConst = isa<ConstantSDNode>(N0);
if (N0IsConst) {
@@ -2562,12 +2668,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isNullValue())
return N1;
- // We require a splat of the entire scalar bit width for non-contiguous
- // bit patterns.
- bool IsFullSplat =
- ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
+ if (N1IsConst && ConstValue1.isOneValue())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2580,16 +2682,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getConstant(0, DL, VT), N0);
}
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
- IsFullSplat) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) &&
+ (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(ConstValue1.logBase2(), DL,
- getShiftAmountTy(N0.getValueType())));
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
- IsFullSplat) {
+ if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -2835,7 +2941,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2906,7 +3012,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2965,7 +3071,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
}
}
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -3003,19 +3109,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector()) {
+ // fold (mulhs x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ }
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
- if (isOneConstant(N1)) {
- SDLoc DL(N);
+ if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (mulhs x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
@@ -3043,6 +3156,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector()) {
+ // fold (mulhu x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ }
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -3216,7 +3337,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- // fold (add c1, c2) -> c1+c2
+ // fold operation with constant operands.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
@@ -3599,22 +3720,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
}
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
- EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
- bool &NarrowLoad) {
- uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
-
- if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
+ EVT LoadResultTy, EVT &ExtVT) {
+ if (!AndC->getAPIntValue().isMask())
return false;
+ unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- LoadedVT = LoadN->getMemoryVT();
+ EVT LoadedVT = LoadN->getMemoryVT();
if (ExtVT == LoadedVT &&
(!LegalOperations ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
// ZEXTLOAD will match without needing to change the size of the value being
// loaded.
- NarrowLoad = false;
return true;
}
@@ -3634,10 +3753,185 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
return false;
- NarrowLoad = true;
return true;
}
+bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+ EVT &ExtVT, unsigned ShAmt) {
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(LoadN, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return false;
+
+ // Don't change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Verify that we are actually reducing a load width here.
+ if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LoadN->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
+ LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
+ return false;
+
+ // It's not possible to generate a constant of extended or untyped type.
+ EVT PtrType = LoadN->getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ return true;
+}
+
+bool DAGCombiner::SearchForAndLoads(SDNode *N,
+ SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask,
+ SDNode *&NodeToMask) {
+ // Recursively search for the operands, looking for loads which can be
+ // narrowed.
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Some constants may need fixing up later if they are too large.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
+ (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
+ NodesWithConsts.insert(N);
+ continue;
+ }
+
+ if (!Op.hasOneUse())
+ return false;
+
+ switch(Op.getOpcode()) {
+ case ISD::LOAD: {
+ auto *Load = cast<LoadSDNode>(Op);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
+ isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
+ // Only add this load if we can make it more narrow.
+ if (ExtVT.bitsLT(Load->getMemoryVT()))
+ Loads.insert(Load);
+ continue;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::AssertZext: {
+ unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT VT = Op.getOpcode() == ISD::AssertZext ?
+ cast<VTSDNode>(Op.getOperand(1))->getVT() :
+ Op.getOperand(0).getValueType();
+
+ // We can accept extending nodes if the mask is wider or an equal
+ // width to the original type.
+ if (ExtVT.bitsGE(VT))
+ continue;
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
+ NodeToMask))
+ return false;
+ continue;
+ }
+
+ // Allow one node which will masked along with any loads found.
+ if (NodeToMask)
+ return false;
+ NodeToMask = Op.getNode();
+ }
+ return true;
+}
+
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+ auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Mask)
+ return false;
+
+ if (!Mask->getAPIntValue().isMask())
+ return false;
+
+ // No need to do anything if the and directly uses a load.
+ if (isa<LoadSDNode>(N->getOperand(0)))
+ return false;
+
+ SmallPtrSet<LoadSDNode*, 8> Loads;
+ SmallPtrSet<SDNode*, 2> NodesWithConsts;
+ SDNode *FixupNode = nullptr;
+ if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
+ if (Loads.size() == 0)
+ return false;
+
+ SDValue MaskOp = N->getOperand(1);
+
+ // If it exists, fixup the single node we allow in the tree that needs
+ // masking.
+ if (FixupNode) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+ FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
+ MaskOp);
+ }
+
+ // Narrow any constants that need it.
+ for (auto *LogicN : NodesWithConsts) {
+ auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
+ SDValue(C, 0), MaskOp);
+ DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+ }
+
+ // Create narrow loads.
+ for (auto *Load : Loads) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
+ SDValue(Load, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
+ SDValue NewLoad = ReduceLoadWidth(And.getNode());
+ assert(NewLoad &&
+ "Shouldn't be masking the load if it can't be narrowed");
+ CombineTo(Load, NewLoad, NewLoad.getValue(1));
+ }
+ DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
+ return true;
+ }
+ return false;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3829,55 +4123,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
(N0.getOpcode() == ISD::ANY_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::LOAD))) {
- bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
- LoadSDNode *LN0 = HasAnyExt
- ? cast<LoadSDNode>(N0.getOperand(0))
- : cast<LoadSDNode>(N0);
- if (LN0->getExtensionType() != ISD::SEXTLOAD &&
- LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- auto NarrowLoad = false;
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
- EVT ExtVT, LoadedVT;
- if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
- NarrowLoad)) {
- if (!NarrowLoad) {
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
- LN0->getChain(), LN0->getBasePtr(), ExtVT,
- LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(LN0, NewLoad, NewLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- } else {
- EVT PtrType = LN0->getOperand(1).getValueType();
-
- unsigned Alignment = LN0->getAlignment();
- SDValue NewPtr = LN0->getBasePtr();
-
- // For big endian targets, we need to add an offset to the pointer
- // to load the correct bytes. For little endian systems, we merely
- // need to read fewer bytes from the same pointer.
- if (DAG.getDataLayout().isBigEndian()) {
- unsigned LVTStoreBytes = LoadedVT.getStoreSize();
- unsigned EVTStoreBytes = ExtVT.getStoreSize();
- unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- SDLoc DL(LN0);
- NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
- NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
- Alignment = MinAlign(Alignment, PtrOff);
- }
+ if (SDValue Res = ReduceLoadWidth(N)) {
+ LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
+ ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
- AddToWorklist(NewPtr.getNode());
+ AddToWorklist(N);
+ CombineTo(LN0, Res, Res.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
- SDValue Load = DAG.getExtLoad(
- ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
- LN0->getPointerInfo(), ExtVT, Alignment,
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- AddToWorklist(N);
- CombineTo(LN0, Load, Load.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ if (Level >= AfterLegalizeTypes) {
+ // Attempt to propagate the AND back up to the leaves which, if they're
+ // loads, can be combined to narrow loads and the AND node can be removed.
+ // Perform after legalization so that extend nodes will already be
+ // combined into the loads.
+ if (BackwardsPropagateMask(N, DAG)) {
+ return SDValue(N, 0);
}
}
@@ -3974,7 +4236,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
- // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
@@ -4593,20 +4855,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return nullptr;
}
-// if Left + Right == Sum (constant or constant splat vector)
-static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum,
- SelectionDAG &DAG, const SDLoc &DL) {
- EVT ShiftVT = Left.getValueType();
- if (ShiftVT != Right.getValueType()) return false;
-
- SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT,
- Left.getNode(), Right.getNode());
- if (!ShiftSum) return false;
-
- ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum);
- return CSum && CSum->getZExtValue() == Sum;
-}
-
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
@@ -4620,6 +4868,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
if (!HasROTL && !HasROTR) return nullptr;
+ // Check for truncated rotate.
+ if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
+ assert(LHS.getValueType() == RHS.getValueType());
+ if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
+ SDValue(Rot, 0)).getNode();
+ }
+ }
+
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
@@ -4652,7 +4910,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) {
+ auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+ };
+ if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -4712,20 +4974,22 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
namespace {
+
/// Represents known origin of an individual byte in load combine pattern. The
/// value of the byte is either constant zero or comes from memory.
struct ByteProvider {
// For constant zero providers Load is set to nullptr. For memory providers
// Load represents the node which loads the byte from memory.
// ByteOffset is the offset of the byte in the value produced by the load.
- LoadSDNode *Load;
- unsigned ByteOffset;
+ LoadSDNode *Load = nullptr;
+ unsigned ByteOffset = 0;
- ByteProvider() : Load(nullptr), ByteOffset(0) {}
+ ByteProvider() = default;
static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
return ByteProvider(Load, ByteOffset);
}
+
static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
bool isConstantZero() const { return !Load; }
@@ -4740,6 +5004,8 @@ private:
: Load(Load), ByteOffset(ByteOffset) {}
};
+} // end anonymous namespace
+
/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns None if the provider can't be calculated.
///
@@ -4751,9 +5017,9 @@ private:
/// Because the parts of the expression are not allowed to have more than one
/// use this function iterates over trees, not DAGs. So it never visits the same
/// node more than once.
-const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
- unsigned Depth,
- bool Root = false) {
+static const Optional<ByteProvider>
+calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
+ bool Root = false) {
// Typical i64 by i8 pattern requires recursion up to 8 calls depth
if (Depth == 10)
return None;
@@ -4837,7 +5103,6 @@ const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
return None;
}
-} // namespace
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
@@ -4950,7 +5215,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Loads.insert(L);
}
- assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ assert(!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
@@ -5373,7 +5638,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (shl x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5400,20 +5669,29 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
-
- return DAG.getNode(
- ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -5527,16 +5805,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
- if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+ N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
AddToWorklist(Shl0.getNode());
AddToWorklist(Shl1.getNode());
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
@@ -5579,7 +5859,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
// fold (sra x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5603,20 +5887,31 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
-
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+ if (N0.getOpcode() == ISD::SRA) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
- return DAG.getNode(
- ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -5647,7 +5942,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
-
SDLoc DL(N);
SDValue Amt = DAG.getConstant(ShiftAmt, DL,
getShiftAmountTy(N0.getOperand(0).getValueType()));
@@ -5697,7 +5991,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
-
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
@@ -5730,7 +6023,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5745,20 +6042,29 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
+ if (N0.getOpcode() == ISD::SRL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
-
- return DAG.getNode(
- ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -6008,7 +6314,6 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
-
/// \brief Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -6096,7 +6401,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// For any constants that differ by 1, we can transform the select into an
// extend and add. Use a target hook because some targets may prefer to
// transform in the other direction.
- if (TLI.convertSelectOfConstantsToMath()) {
+ if (TLI.convertSelectOfConstantsToMath(VT)) {
if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
@@ -6371,7 +6676,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6432,7 +6736,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6447,7 +6750,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).
if (Mask.getOpcode() == ISD::SETCC) {
-
// Check if any splitting is required.
if (TLI.getTypeAction(*DAG.getContext(), VT) !=
TargetLowering::TypeSplitVector)
@@ -6504,11 +6806,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
- MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDLoc DL(N);
@@ -6581,7 +6882,6 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6593,7 +6893,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).
-
if (Mask.getOpcode() == ISD::SETCC) {
EVT VT = N->getValueType(0);
@@ -6665,6 +6964,57 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
return SDValue();
}
+/// A vector select of 2 constant vectors can be simplified to math/logic to
+/// avoid a variable select instruction and possibly avoid constant loads.
+SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
+ !TLI.convertSelectOfConstantsToMath(VT) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
+ return SDValue();
+
+ // Check if we can use the condition value to increment/decrement a single
+ // constant value. This simplifies a select to an add and removes a constant
+ // load/materialization from the general case.
+ bool AllAddOne = true;
+ bool AllSubOne = true;
+ unsigned Elts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != Elts; ++i) {
+ SDValue N1Elt = N1.getOperand(i);
+ SDValue N2Elt = N2.getOperand(i);
+ if (N1Elt.isUndef() || N2Elt.isUndef())
+ continue;
+
+ const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
+ const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+ if (C1 != C2 + 1)
+ AllAddOne = false;
+ if (C1 != C2 - 1)
+ AllSubOne = false;
+ }
+
+ // Further simplifications for the extra-special cases where the constants are
+ // all 0 or all -1 should be implemented as folds of these patterns.
+ SDLoc DL(N);
+ if (AllAddOne || AllSubOne) {
+ // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
+ // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
+ auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
+ }
+
+ // The general case for select-of-constants:
+ // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
+ // ...but that only makes sense if a vselect is slower than 2 logic ops, so
+ // leave that to a machine-specific pass.
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6729,6 +7079,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return CV;
}
+ if (SDValue V = foldVSelectOfConstants(N))
+ return V;
+
return SDValue();
}
@@ -7243,8 +7596,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7307,7 +7667,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
- if (!VT.isVector()) {
+ if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
EVT SetCCVT = getSetCCResultType(N00VT);
// Don't do this transform for i1 because there's a select transform
// that would reverse it.
@@ -7399,20 +7759,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
- // fold (zext (truncate (load x))) -> (zext (smaller load x))
- // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
- if (N0.getOpcode() == ISD::TRUNCATE) {
- if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode *oye = N0.getOperand(0).getNode();
- if (NarrowLoad.getNode() != N0.getNode()) {
- CombineTo(N0.getNode(), NarrowLoad);
- // CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorklist(oye);
- }
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
-
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
@@ -7445,7 +7791,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
AddToWorklist(Op.getNode());
- return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ // We may safely transfer the debug info describing the truncate node over
+ // to the equivalent and operation.
+ DAG.transferDbgValues(N0, And);
+ return And;
}
}
@@ -7522,11 +7872,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!N0.hasOneUse()) {
if (N0.getOpcode() == ISD::AND) {
auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
- auto NarrowLoad = false;
EVT LoadResultTy = AndC->getValueType(0);
- EVT ExtVT, LoadedVT;
- if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
- NarrowLoad))
+ EVT ExtVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
DoXform = false;
}
if (DoXform)
@@ -7547,8 +7895,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7604,10 +7959,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend.
- EVT MatchingElementType = EVT::getIntegerVT(
- *DAG.getContext(), N00VT.getScalarSizeInBits());
- EVT MatchingVectorType = EVT::getVectorVT(
- *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
@@ -7731,7 +8083,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
- CombineTo(N, ExtLoad);
+ CombineTo(N, ExtLoad);
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7769,13 +8121,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// aext(setcc) -> aext(vsetcc)
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
- EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N00VT.getSizeInBits())
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -7783,7 +8138,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// elements we can use a matching integer vector type and then
// truncate/any extend
else {
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
@@ -7804,77 +8159,47 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+SDValue DAGCombiner::visitAssertExt(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- EVT EVT = cast<VTSDNode>(N1)->getVT();
+ EVT AssertVT = cast<VTSDNode>(N1)->getVT();
- // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
- if (N0.getOpcode() == ISD::AssertZext &&
- EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
+ if (N0.getOpcode() == Opcode &&
+ AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
return N0;
- return SDValue();
-}
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == Opcode) {
+ // We have an assert, truncate, assert sandwich. Make one stronger assert
+ // by asserting on the smallest asserted type to the larger source type.
+ // This eliminates the later assert:
+ // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
+ // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used. If so, return the simpler operand,
-/// otherwise return a null SDValue.
-///
-/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
-/// simplify nodes with multiple uses more aggressively.)
-SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
- switch (V.getOpcode()) {
- default: break;
- case ISD::Constant: {
- const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV && "Const value should be ConstSDNode.");
- const APInt &CVal = CV->getAPIntValue();
- APInt NewVal = CVal & Mask;
- if (NewVal != CVal)
- return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
- break;
+ SDLoc DL(N);
+ EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
+ SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), MinAssertVTVal);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
- case ISD::OR:
- case ISD::XOR:
- // If the LHS or RHS don't contribute bits to the or, drop them.
- if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
- return V.getOperand(1);
- if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
- return V.getOperand(0);
- break;
- case ISD::SRL:
- // Only look at single-use SRLs.
- if (!V.getNode()->hasOneUse())
- break;
- if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
- // See if we can recursively simplify the LHS.
- unsigned Amt = RHSC->getZExtValue();
- // Watch out for shift count overflow though.
- if (Amt >= Mask.getBitWidth()) break;
- APInt NewMask = Mask << Amt;
- if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
- return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
- SimplifyLHS, V.getOperand(1));
- }
- break;
- case ISD::AND: {
- // X & -1 -> X (ignoring bits which aren't demanded).
- ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
- if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
- return V.getOperand(0);
- break;
- }
- }
return SDValue();
}
/// If the result of a wider load is shifted to right of N bits and then
/// truncated to a narrower type and where N is a multiple of number of bits of
/// the narrower type, transform it to a narrower load from address + N / num of
-/// bits of new type. If the result is to be extended, also fold the extension
-/// to form a extending load.
+/// bits of new type. Also narrow the load if the result is masked with an AND
+/// to effectively produce a smaller type. If the result is to be extended, also
+/// fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
@@ -7893,28 +8218,40 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {
- // Another special-case: SRL is basically zero-extending a narrower value.
+ // Another special-case: SRL is basically zero-extending a narrower value,
+ // or it maybe shifting a higher subword, half or byte into the lowest
+ // bits.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
- ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01) return SDValue();
- ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - N01->getZExtValue());
- }
- if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
- return SDValue();
- unsigned EVTBits = ExtVT.getSizeInBits();
+ auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
+ auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01 || !LN0)
+ return SDValue();
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!ExtVT.isRound())
- return SDValue();
+ uint64_t ShiftAmt = N01->getZExtValue();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
+ else
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - ShiftAmt);
+ } else if (Opc == ISD::AND) {
+ // An AND with a constant mask is the same as a truncate + zero-extend.
+ auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isMask())
+ return SDValue();
+
+ unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ }
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
+ unsigned EVTBits = ExtVT.getSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
@@ -7951,42 +8288,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
}
}
- // If we haven't found a load, we can't narrow it. Don't transform one with
- // multiple uses, this would require adding a new load.
- if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ // If we haven't found a load, we can't narrow it.
+ if (!isa<LoadSDNode>(N0))
return SDValue();
- // Don't change the width of a volatile load.
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isVolatile())
- return SDValue();
-
- // Verify that we are actually reducing a load width here.
- if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
- return SDValue();
-
- // For the transform to be legal, the load must produce only two values
- // (the value loaded and the chain). Don't transform a pre-increment
- // load, for example, which produces an extra value. Otherwise the
- // transformation is not equivalent, and the downstream logic to replace
- // uses gets things wrong.
- if (LN0->getNumValues() > 2)
- return SDValue();
-
- // If the load that we're shrinking is an extload and we're not just
- // discarding the extension we can't simply shrink the load. Bail.
- // TODO: It would be possible to merge the extensions in some cases.
- if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
- LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
- return SDValue();
-
- if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
- return SDValue();
-
- EVT PtrType = N0.getOperand(1).getValueType();
-
- if (PtrType == MVT::Untyped || PtrType.isExtended())
- // It's not possible to generate a constant of extended or untyped type.
+ if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
// For big endian targets, we need to adjust the offset to the pointer to
@@ -7997,6 +8304,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
}
+ EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
@@ -8130,10 +8438,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_inreg (extload x)) -> (sextload x)
+ // If sextload is not supported by target, we can only do the combine when
+ // load has one use. Doing otherwise can block folding the extload with other
+ // extends that the target does support.
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+ N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -8208,12 +8520,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// noop truncate
if (N0.getValueType() == N->getValueType(0))
return N0;
- // fold (truncate c1) -> c1
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (truncate c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
@@ -8245,7 +8563,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
-
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
@@ -8311,7 +8628,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N0.getOperand(0).hasOneUse()) {
-
SDValue BuildVect = N0.getOperand(0);
EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
EVT TruncVecEltTy = VT.getVectorElementType();
@@ -8340,9 +8656,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
- if (SDValue Shorter =
- GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
- VT.getSizeInBits())))
+ APInt Mask =
+ APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+ if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
@@ -8413,7 +8729,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Fold truncate of a bitcast of a vector to an extract of the low vector
// element.
//
- // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
EVT SrcVT = VecSrc.getValueType();
@@ -8423,8 +8739,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDLoc SL(N);
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
- VecSrc, DAG.getConstant(0, SL, IdxVT));
+ VecSrc, DAG.getConstant(Idx, SL, IdxVT));
}
}
@@ -8466,11 +8783,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+ // A BUILD_PAIR is always having the least significant part in elt 0 and the
+ // most significant part in elt 1. So when combining into one large load, we
+ // need to consider the endianness.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LD1, LD2);
+
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
- unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
+ unsigned LD1Bytes = LD1VT.getStoreSize();
if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
unsigned Align = LD1->getAlignment();
@@ -8751,12 +9075,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (Op.getOpcode() == ISD::BITCAST &&
Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
return DAG.getBitcast(VT, Op);
return SDValue();
};
+ // FIXME: If either input vector is bitcast, try to convert the shuffle to
+ // the result type of this bitcast. This would eliminate at least one
+ // bitcast. See the transform in InstCombine.
SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
if (!(SV0 && SV1))
@@ -8949,7 +9276,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -8979,28 +9305,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
}
+ }
- // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
}
}
@@ -9036,80 +9365,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (LookThroughFPExt) {
- // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
- N020.getOperand(0), N020.getOperand(1),
- N1);
+
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
- N002.getOperand(0), N002.getOperand(1),
- N1);
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
- // -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
- SDValue N12 = N1.getOperand(2);
- if (N12.getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N12.getOperand(0);
- if (isContractableFMUL(N120))
- return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
- N120.getOperand(0), N120.getOperand(1),
- N0);
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
}
}
+ }
- // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
- // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
- SDValue N102 = N10.getOperand(2);
- if (isContractableFMUL(N102))
- return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
- N102.getOperand(0), N102.getOperand(1),
- N0);
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
}
}
}
@@ -9151,7 +9487,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -9187,79 +9522,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
+ }
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0);
- }
-
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fneg (fma (fpext x), (fpext y), z))
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
+ }
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fneg (fma (fpext x)), (fpext y), z)
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
-
}
// More folding opportunities when target permits.
@@ -9298,102 +9637,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (LookThroughFPExt) {
- // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
- // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y),
- // (fma (fpext u), (fpext v), (fneg z)))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
-
- // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
- // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120)) {
- SDValue N1200 = N120.getOperand(0);
- SDValue N1201 = N120.getOperand(1);
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
+ N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0));
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
+ }
- // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
- // -> (fma (fneg (fpext y)), (fpext z),
- // (fma (fneg (fpext u)), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
- SDValue N100 = N1.getOperand(0).getOperand(0);
- SDValue N101 = N1.getOperand(0).getOperand(1);
- SDValue N102 = N1.getOperand(0).getOperand(2);
- if (isContractableFMUL(N102)) {
- SDValue N1020 = N102.getOperand(0);
- SDValue N1021 = N102.getOperand(1);
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0));
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
}
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue CvtSrc = N1.getOperand(0);
+ SDValue N100 = CvtSrc.getOperand(0);
+ SDValue N101 = CvtSrc.getOperand(1);
+ SDValue N102 = CvtSrc.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
}
return SDValue();
@@ -9959,6 +10304,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
+
+ // fma (fneg x), K, y -> fma x -K, y
+ if (N0.getOpcode() == ISD::FNEG &&
+ (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ }
}
if (Options.UnsafeFPMath) {
@@ -10081,8 +10434,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
(!LegalOperations ||
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
// backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT), Flags);
@@ -10264,7 +10617,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -10282,7 +10635,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
@@ -10296,7 +10649,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
@@ -10318,7 +10671,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -10333,10 +10686,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// The next optimizations are desirable only if SELECT_CC can be lowered.
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
-
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
@@ -10557,6 +10909,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+ // fold ftrunc (known rounded int x) -> x
+ // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
+ // likely to be generated to extract integer from a rounded floating value.
+ switch (N0.getOpcode()) {
+ default: break;
+ case ISD::FRINT:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ return N0;
+ }
+
return SDValue();
}
@@ -11160,6 +11525,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
+ AddToWorklist(Result.getNode());
return true;
}
@@ -11445,6 +11811,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
namespace {
+
/// \brief Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
@@ -11462,21 +11829,19 @@ struct LoadedSlice {
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize;
+
/// Various cost.
- unsigned Loads;
- unsigned Truncates;
- unsigned CrossRegisterBanksCopies;
- unsigned ZExts;
- unsigned Shift;
+ unsigned Loads = 0;
+ unsigned Truncates = 0;
+ unsigned CrossRegisterBanksCopies = 0;
+ unsigned ZExts = 0;
+ unsigned Shift = 0;
- Cost(bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+ Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
/// \brief Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+ : ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
if (TruncType != LoadedType &&
@@ -11538,13 +11903,17 @@ struct LoadedSlice {
bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
};
+
// The last instruction that represent the slice. This should be a
// truncate instruction.
SDNode *Inst;
+
// The original load instruction.
LoadSDNode *Origin;
+
// The right shift amount in bits from the original load.
unsigned Shift;
+
// The DAG from which Origin came from.
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
@@ -11746,7 +12115,8 @@ struct LoadedSlice {
return true;
}
};
-}
+
+} // end anonymous namespace
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
@@ -11804,7 +12174,6 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
-
Second = &LoadedSlices[CurrSlice];
// If First is NULL, it means we start a new pair.
@@ -11935,7 +12304,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// will be across several bytes. We do not support that.
unsigned Width = User->getValueSizeInBits(0);
if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
- return 0;
+ return false;
// Build the slice for this chain of computations.
LoadedSlice LS(User, LD, Shift, &DAG);
@@ -12060,7 +12429,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
return Result;
}
-
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
@@ -12121,7 +12489,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
.getNode();
}
-
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
/// narrowing the load and store if it would end up being a win for performance
@@ -12325,7 +12692,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
// Walk all the users of the constant with which we're multiplying.
for (SDNode *Use : ConstNode->uses()) {
-
if (Use == MulNode) // This use is the one we're on right now. Skip it.
continue;
@@ -12376,6 +12742,12 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
+static SDValue peekThroughBitcast(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores) {
SmallVector<SDValue, 8> Chains;
@@ -12403,56 +12775,93 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (NumStores < 2)
return false;
- int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
-
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
- SDValue StoredVal;
+ int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ unsigned SizeInBits = NumStores * ElementSizeBits;
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+ EVT StoreTy;
if (UseVector) {
- bool IsVec = MemVT.isVector();
- unsigned Elts = NumStores;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = NumStores * NumMemElts;
// Get the type for the merged vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ } else
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ SDValue StoredVal;
+ if (UseVector) {
if (IsConstantSrc) {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
SDValue Val = St->getValue();
- if (MemVT.getScalarType().isInteger())
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
- Val = DAG.getConstant(
- (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
- SDLoc(CFP), MemVT);
+ // If constant is of the wrong type, convert it now.
+ if (MemVT != Val.getValueType()) {
+ Val = peekThroughBitcast(Val);
+ // Deal with constants of wrong size.
+ if (ElementSizeBits != Val.getValueSizeInBits()) {
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ if (isa<ConstantFPSDNode>(Val)) {
+ // Not clear how to truncate FP values.
+ return false;
+ } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
+ }
+ // Make sure correctly size type is the correct type.
+ Val = DAG.getBitcast(MemVT, Val);
+ }
BuildVector.push_back(Val);
}
- StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = St->getValue();
- // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
- if (Val.getValueType() != MemVT)
- return false;
+ SDValue Val = peekThroughBitcast(St->getValue());
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
+ // type MemVT. If the underlying value is not the correct
+ // type, but it is an extraction of an appropriate vector we
+ // can recast Val to be of the correct type. This may require
+ // converting between EXTRACT_VECTOR_ELT and
+ // EXTRACT_SUBVECTOR.
+ if ((MemVT != Val.getValueType()) &&
+ (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
+ SDValue Vec = Val.getOperand(0);
+ EVT MemVTScalarTy = MemVT.getScalarType();
+ // We may need to add a bitcast here to get types to line up.
+ if (MemVTScalarTy != Vec.getValueType()) {
+ unsigned Elts = Vec.getValueType().getSizeInBits() /
+ MemVTScalarTy.getSizeInBits();
+ EVT NewVecTy =
+ EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
+ Vec = DAG.getBitcast(NewVecTy, Vec);
+ }
+ auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
+ }
Ops.push_back(Val);
}
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
- DL, Ty, Ops); }
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, Ops);
+ }
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
@@ -12463,18 +12872,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
- StoreInt <<= ElementSizeBytes * 8;
+ StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
+ StoreInt |= C->getAPIntValue()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
+ StoreInt |= C->getValueAPF()
+ .bitcastToAPInt()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ // If fp truncation is necessary give up for now.
+ if (MemVT.getSizeInBits() != ElementSizeBits)
+ return false;
} else {
llvm_unreachable("Invalid constant element type");
}
}
// Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
@@ -12483,7 +12899,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
- if (UseVector || !UseTrunc) {
+ if (!UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
@@ -12517,6 +12933,7 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
EVT MemVT = St->getMemoryVT();
+ SDValue Val = peekThroughBitcast(St->getValue());
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return;
@@ -12525,47 +12942,62 @@ void DAGCombiner::getStoreMergeCandidates(
if (BasePtr.getBase().isUndef())
return;
- bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
- isa<ConstantFPSDNode>(St->getValue());
- bool IsExtractVecSrc =
- (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
+ bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsLoadSrc = isa<LoadSDNode>(Val);
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
- if (IsLoadSrc)
- LBasePtr = BaseIndexOffset::match(
- cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
-
+ EVT LoadVT;
+ if (IsLoadSrc) {
+ auto *Ld = cast<LoadSDNode>(Val);
+ LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ LoadVT = Ld->getMemoryVT();
+ // Load and store should be the same type.
+ if (MemVT != LoadVT)
+ return;
+ }
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
- // We can merge constant floats to equivalent integers
- if (Other->getMemoryVT() != MemVT)
- if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
- isa<ConstantFPSDNode>(Other->getValue())))
- return false;
+ SDValue Val = peekThroughBitcast(Other->getValue());
+ // Allow merging constants of different types as integers.
+ bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
+ : Other->getMemoryVT() != MemVT;
if (IsLoadSrc) {
+ if (NoTypeMatch)
+ return false;
// The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
return false;
}
- if (IsConstantSrc)
- if (!(isa<ConstantSDNode>(Other->getValue()) ||
- isa<ConstantFPSDNode>(Other->getValue())))
+ if (IsConstantSrc) {
+ if (NoTypeMatch)
return false;
- if (IsExtractVecSrc)
- if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+ return false;
+ }
+ if (IsExtractVecSrc) {
+ // Do not merge truncated stores here.
+ if (Other->isTruncatingStore())
return false;
+ if (!MemVT.bitsEq(Val.getValueType()))
+ return false;
+ if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+ }
Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
+
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
@@ -12612,10 +13044,8 @@ void DAGCombiner::getStoreMergeCandidates(
// indirectly through its operand (we already consider dependencies
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
-
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
-
// FIXME: We should be able to truncate a full search of
// predecessors by doing a BFS and keeping tabs the originating
// stores from which worklist nodes come from in a similar way to
@@ -12648,12 +13078,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
return false;
EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;
- bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
// This function cannot currently deal with non-byte-sized memory sizes.
@@ -12665,7 +13096,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Perform an early exit check. Do not bother looking at stored values that
// are not constants, loads, or extracted vector elements.
- SDValue StoredVal = St->getValue();
+ SDValue StoredVal = peekThroughBitcast(St->getValue());
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
@@ -12675,12 +13106,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
return false;
- // Don't merge vectors into wider vectors if the source data comes from loads.
- // TODO: This restriction can be lifted by using logic similar to the
- // ExtractVecSrc case.
- if (MemVT.isVector() && IsLoadSrc)
- return false;
-
SmallVector<MemOpLink, 8> StoreNodes;
// Find potential store merge candidates by searching through chain sub-DAG
getStoreMergeCandidates(St, StoreNodes);
@@ -12759,19 +13184,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C =
- dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
}
+ NonZero |= !IsElementZero;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
@@ -12791,8 +13217,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
@@ -12806,13 +13232,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
!NoVectors) {
// Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (MemVT.isVector()) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) &&
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
@@ -12821,23 +13243,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
}
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
// Check if we found a legal integer type that creates a meaningful merge.
- if (LastLegalType < 2 && LastLegalVectorType < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
+ NumSkip++;
+ }
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- if (!Merged) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- continue;
- }
+ RV |= Merged;
+
// Remove merged stores for next iteration.
- RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
}
@@ -12849,25 +13282,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 1;
- bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- unsigned StoreValOpcode = St->getValue().getOpcode();
+ SDValue StVal = peekThroughBitcast(St->getValue());
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a
// vector. It should be possible to handle mixed sources, but load
// sources need more careful handling (see the block of code below that
// handles consecutive loads).
- if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
- StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+ if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return RV;
// Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = (i + 1) * NumMemElts;
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
@@ -12879,6 +13307,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumStoresToMerge = i + 1;
}
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ continue;
+ }
+
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, false, true, false);
if (!Merged) {
@@ -12905,7 +13350,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
BaseIndexOffset LdBasePtr;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ SDValue Val = peekThroughBitcast(St->getValue());
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
if (!Ld)
break;
@@ -12917,10 +13363,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->isVolatile() || Ld->isIndexed())
break;
- // We do not accept ext loads.
- if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
- break;
-
// The stored memory type must be the same.
if (Ld->getMemoryVT() != MemVT)
break;
@@ -12986,7 +13428,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
isDereferenceable = false;
// Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
@@ -13023,8 +13467,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
@@ -13047,7 +13491,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
@@ -13055,7 +13511,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
@@ -13104,12 +13562,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue(NewLoad.getNode(), 1));
}
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
+ // Replace the all stores with the new store. Recursively remove
+ // corresponding value if its no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- continue;
}
return RV;
}
@@ -13284,7 +13747,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = GetDemandedBits(
+ SDValue Shorter = DAG.GetDemandedBits(
Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits()));
AddToWorklist(Value.getNode());
@@ -13356,11 +13819,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
- // Only perform this optimization before the types are legal, because we
- // don't want to perform this optimization on every DAGCombine invocation.
- if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
- : !LegalTypes) {
- for (;;) {
+ // Always perform this optimization before types are legal. If the target
+ // prefers, also try this after legalization to catch stores that were created
+ // by intrinsics or other nodes.
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
@@ -13499,6 +13962,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return St1;
}
+/// Convert a disguised subvector insertion into a shuffle:
+/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+/// bitcast(shuffle (bitcast V), (extended X), Mask)
+/// Note: We do not use an insert_subvector node because that requires a legal
+/// subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ SDValue InsertVal = N->getOperand(1);
+ if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
+ !InsertVal.getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ SDValue SubVec = InsertVal.getOperand(0);
+ SDValue DestVec = N->getOperand(0);
+ EVT SubVecVT = SubVec.getValueType();
+ EVT VT = DestVec.getValueType();
+ unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
+ unsigned NumMaskVals = ExtendRatio * NumSrcElts;
+
+ // Step 1: Create a shuffle mask that implements this insert operation. The
+ // vector that we are inserting into will be operand 0 of the shuffle, so
+ // those elements are just 'i'. The inserted subvector is in the first
+ // positions of operand 1 of the shuffle. Example:
+ // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
+ SmallVector<int, 16> Mask(NumMaskVals);
+ for (unsigned i = 0; i != NumMaskVals; ++i) {
+ if (i / NumSrcElts == InsIndex)
+ Mask[i] = (i % NumSrcElts) + NumMaskVals;
+ else
+ Mask[i] = i;
+ }
+
+ // Bail out if the target can not handle the shuffle we want to create.
+ EVT SubVecEltVT = SubVecVT.getVectorElementType();
+ EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
+ if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
+ return SDValue();
+
+ // Step 2: Create a wide vector from the inserted source vector by appending
+ // undefined elements. This is the same size as our destination vector.
+ SDLoc DL(N);
+ SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
+ ConcatOps[0] = SubVec;
+ SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
+
+ // Step 3: Shuffle in the padded subvector.
+ SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
+ SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
+ AddToWorklist(PaddedSubV.getNode());
+ AddToWorklist(DestVecBC.getNode());
+ AddToWorklist(Shuf.getNode());
+ return DAG.getBitcast(VT, Shuf);
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -13511,10 +14028,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // Check that we know which element is being inserted
- if (!isa<ConstantSDNode>(EltNo))
+ // Remove redundant insertions:
+ // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
+ return InVec;
+
+ // We must know which element is being inserted for folds below here.
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (!IndexC)
return SDValue();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ unsigned Elt = IndexC->getZExtValue();
+
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
// Canonicalize insert_vector_elt dag nodes.
// Example:
@@ -13692,9 +14219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
- // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+ // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
- ConstEltNo->isNullValue() && VT.isInteger()) {
+ ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
SDValue BCSrc = InVec.getOperand(0);
if (BCSrc.getValueType().isScalarInteger())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
@@ -13748,7 +14277,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// FIXME: We should handle recursing on other vector shuffles and
// scalar_to_vector here as well.
- if (!LegalOperations) {
+ if (!LegalOperations ||
+ // FIXME: Should really be just isOperationLegalOrCustom.
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
@@ -14054,10 +14586,18 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = InVT1.getVectorNumElements();
+ unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
+ // In case both the input vectors are extracted from same base
+ // vector we do not need extra addend (Vec2Offset) while
+ // computing shuffle mask.
+ if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+ !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+ !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
+ Vec2Offset = InVT1.getVectorNumElements();
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
@@ -14072,7 +14612,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
} else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
- if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
if (!VecIn2.getNode()) {
@@ -14204,7 +14744,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
-
SDValue ExtractedFromVec = Op.getOperand(0);
// All inputs must have the same element type as the output.
@@ -14227,6 +14766,50 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (VecIn.size() < 2)
return SDValue();
+ // If all the Operands of BUILD_VECTOR extract from same
+ // vector, then split the vector efficiently based on the maximum
+ // vector access index and adjust the VectorMask and
+ // VecIn accordingly.
+ if (VecIn.size() == 2) {
+ unsigned MaxIndex = 0;
+ unsigned NearestPow2 = 0;
+ SDValue Vec = VecIn.back();
+ EVT InVT = Vec.getValueType();
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SmallVector<unsigned, 8> IndexVec(NumElems, 0);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ unsigned Index = N->getOperand(i).getConstantOperandVal(1);
+ IndexVec[i] = Index;
+ MaxIndex = std::max(MaxIndex, Index);
+ }
+
+ NearestPow2 = PowerOf2Ceil(MaxIndex);
+ if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
+ NumElems * 2 < NearestPow2) {
+ unsigned SplitSize = NearestPow2 / 2;
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), SplitSize);
+ if (TLI.isTypeLegal(SplitVT)) {
+ SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(SplitSize, DL, IdxTy));
+ SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(0, DL, IdxTy));
+ VecIn.pop_back();
+ VecIn.push_back(VecIn1);
+ VecIn.push_back(VecIn2);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
+ }
+ }
+ }
+ }
+
// TODO: We want to sort the vectors by descending length, so that adjacent
// pairs have similar length, and the longer vector is always first in the
// pair.
@@ -14315,77 +14898,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
}
}
-
return Shuffles[0];
}
-// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-// operations which can be matched to a truncate.
-SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
- // TODO: Add support for big-endian.
- if (DAG.getDataLayout().isBigEndian())
- return SDValue();
- if (N->getNumOperands() < 2)
- return SDValue();
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- unsigned NumElems = N->getNumOperands();
-
- if (!isTypeLegal(VT))
- return SDValue();
-
- // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
- // index, bail out.
- // TODO: Allow undef elements in some cases?
- if (any_of(N->ops(), [VT](SDValue Op) {
- return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Op.getOperand(1)) ||
- Op.getValueType() != VT.getVectorElementType();
- }))
- return SDValue();
-
- // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
- auto GetExtractIdx = [](SDValue Extract) {
- return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
- };
-
- // The first BUILD_VECTOR operand must be an an extract from index zero
- // (assuming no undef and little-endian).
- if (GetExtractIdx(N->getOperand(0)) != 0)
- return SDValue();
-
- // Compute the stride from the first index.
- int Stride = GetExtractIdx(N->getOperand(1));
- SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
-
- // Proceed only if the stride and the types can be matched to a truncate.
- if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
- (ExtractedFromVec.getValueType().getVectorNumElements() !=
- Stride * NumElems) ||
- (VT.getScalarSizeInBits() * Stride > 64))
- return SDValue();
-
- // Check remaining operands are consistent with the computed stride.
- for (unsigned i = 1; i != NumElems; ++i) {
- SDValue Op = N->getOperand(i);
-
- if ((Op.getOperand(0) != ExtractedFromVec) ||
- (GetExtractIdx(Op) != Stride * i))
- return SDValue();
- }
-
- // All checks were ok, construct the truncate.
- LLVMContext &Ctx = *DAG.getContext();
- EVT NewVT = VT.getVectorVT(
- Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
- EVT TruncVT =
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
-
- SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
- Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
- return DAG.getBitcast(VT, Res);
-}
-
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -14428,10 +14943,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
- if (TLI.isDesirableToCombineBuildVectorToTruncate())
- if (SDValue V = reduceBuildVecToTrunc(N))
- return V;
-
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -14514,8 +15025,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
for (SDValue Op : N->ops()) {
// Peek through any bitcast.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
+ Op = peekThroughBitcast(Op);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (Op.isUndef()) {
@@ -14534,8 +15044,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
EVT ExtVT = ExtVec.getValueType();
// Peek through any bitcast.
- while (ExtVec.getOpcode() == ISD::BITCAST)
- ExtVec = ExtVec.getOperand(0);
+ ExtVec = peekThroughBitcast(ExtVec);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (ExtVec.isUndef()) {
@@ -14760,9 +15269,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
- SDValue BinOp = Extract->getOperand(0);
- if (BinOp.getOpcode() == ISD::BITCAST)
- BinOp = BinOp.getOperand(0);
+ SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
// TODO: The motivating case for this transform is an x86 AVX1 target. That
// target has temptingly almost legal versions of bitwise logic ops in 256-bit
@@ -14786,13 +15293,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
// Peek through bitcasts of the binary operator operands if needed.
- SDValue LHS = BinOp.getOperand(0);
- if (LHS.getOpcode() == ISD::BITCAST)
- LHS = LHS.getOperand(0);
-
- SDValue RHS = BinOp.getOperand(1);
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
+ SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
+ SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
@@ -14891,8 +15393,34 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
// Skip bitcasting
- if (V->getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
+ V = peekThroughBitcast(V);
+
+ // If the input is a build vector. Try to make a smaller build vector.
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ EVT InVT = V->getValueType(0);
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElems);
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
+ EltSize;
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+ makeArrayRef(V->op_begin() + IdxVal,
+ NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
+ }
+ }
+ }
+ }
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
@@ -15013,6 +15541,37 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
+static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ auto isUndefElt = [](SDValue V, int Idx) {
+ // TODO - handle more cases as required.
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Idx).isUndef();
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Idx != 0) || V.getOperand(0).isUndef();
+ return false;
+ };
+
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
+ ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
+ Changed = true;
+ Idx = -1;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+
+ return SDValue();
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -15091,7 +15650,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
-// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
@@ -15103,6 +15662,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
+
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
@@ -15114,6 +15674,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return SDValue();
}
+ // If both inputs are splats of the same value then we can safely merge this
+ // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+ bool IsSplat = false;
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (BV0 && BV1)
+ if (SDValue Splat0 = BV0->getSplatValue())
+ IsSplat = (Splat0 == BV1->getSplatValue());
+
SmallVector<SDValue, 8> Ops;
SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
@@ -15124,23 +15693,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- if (Idx == 0)
- Op = S.getOperand(0);
+ assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
+ Op = S.getOperand(0);
} else {
// Operand can't be combined - bail out.
return SDValue();
}
}
- // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
- // fine, but it's likely to generate low-quality code if the target can't
- // reconstruct an appropriate shuffle.
+ // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+ // generating a splat; semantically, this is fine, but it's likely to
+ // generate low-quality code if the target can't reconstruct an appropriate
+ // shuffle.
if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
- if (!DuplicateOps.insert(Op).second)
+ if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
Ops.push_back(Op);
}
+
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
@@ -15162,7 +15733,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI,
- bool LegalOperations) {
+ bool LegalOperations,
+ bool LegalTypes) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -15190,14 +15762,18 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
// power-of-2 extensions as they are the most likely.
for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // Check for non power of 2 vector sizes
+ if (NumElts % Scale != 0)
+ continue;
if (!isAnyExtend(Scale))
continue;
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
- return DAG.getBitcast(VT,
+ if (!LegalTypes || TLI.isTypeLegal(OutVT))
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
}
@@ -15218,9 +15794,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- SDValue N0 = SVN->getOperand(0);
- while (N0.getOpcode() == ISD::BITCAST)
- N0 = N0.getOperand(0);
+ SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -15316,6 +15890,84 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
NewMask);
}
+/// If the shuffle mask is taking exactly one element from the first vector
+/// operand and passing through all other elements from the second vector
+/// operand, return the index of the mask element that is choosing an element
+/// from the first operand. Otherwise, return -1.
+static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
+ int MaskSize = Mask.size();
+ int EltFromOp0 = -1;
+ // TODO: This does not match if there are undef elements in the shuffle mask.
+ // Should we ignore undefs in the shuffle mask instead? The trade-off is
+ // removing an instruction (a shuffle), but losing the knowledge that some
+ // vector lanes are not needed.
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask[i] >= 0 && Mask[i] < MaskSize) {
+ // We're looking for a shuffle of exactly one element from operand 0.
+ if (EltFromOp0 != -1)
+ return -1;
+ EltFromOp0 = i;
+ } else if (Mask[i] != i + MaskSize) {
+ // Nothing from operand 1 can change lanes.
+ return -1;
+ }
+ }
+ return EltFromOp0;
+}
+
+/// If a shuffle inserts exactly one element from a source vector operand into
+/// another vector operand and we can access the specified element as a scalar,
+/// then we can eliminate the shuffle.
+static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ // First, check if we are taking one element of a vector and shuffling that
+ // element into another vector.
+ ArrayRef<int> Mask = Shuf->getMask();
+ SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+ SDValue Op0 = Shuf->getOperand(0);
+ SDValue Op1 = Shuf->getOperand(1);
+ int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
+ if (ShufOp0Index == -1) {
+ // Commute mask and check again.
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+ ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
+ if (ShufOp0Index == -1)
+ return SDValue();
+ // Commute operands to match the commuted shuffle mask.
+ std::swap(Op0, Op1);
+ Mask = CommutedMask;
+ }
+
+ // The shuffle inserts exactly one element from operand 0 into operand 1.
+ // Now see if we can access that element as a scalar via a real insert element
+ // instruction.
+ // TODO: We can try harder to locate the element as a scalar. Examples: it
+ // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
+ "Shuffle mask value must be from operand 0");
+ if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
+ if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
+ return SDValue();
+
+ // There's an existing insertelement with constant insertion index, so we
+ // don't need to check the legality/profitability of a replacement operation
+ // that differs at most in the constant value. The target should be able to
+ // lower any of those in a similar way. If not, legalization will expand this
+ // to a scalar-to-vector plus shuffle.
+ //
+ // Note that the shuffle may move the scalar from the position that the insert
+ // element used. Therefore, our new insert element occurs at the shuffle's
+ // mask index value, not the insert's index value.
+ // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
+ SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
+ Op0.getOperand(2).getValueType());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Op0.getOperand(1), NewInsIndex);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -15362,6 +16014,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
+ // Simplify shuffle mask if a referenced element is UNDEF.
+ if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
+ return V;
+
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ return InsElt;
+
// A shuffle of a single vector that is a splat can always be folded.
if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
if (N1->isUndef() && N0Shuf->isSplat())
@@ -15426,7 +16085,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return S;
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -15486,7 +16145,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (TLI.isTypeLegal(ScaleVT) &&
0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
-
int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
@@ -15661,23 +16319,46 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
- // with a VECTOR_SHUFFLE.
+ // with a VECTOR_SHUFFLE and possible truncate.
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue InVec = InVal->getOperand(0);
SDValue EltNo = InVal->getOperand(1);
-
- // FIXME: We could support implicit truncation if the shuffle can be
- // scaled to a smaller vector scalar type.
- ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
- if (C0 && VT == InVec.getValueType() &&
- VT.getScalarType() == InVal.getValueType()) {
- SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ auto InVecT = InVec.getValueType();
+ if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
+ SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
-
- if (TLI.isShuffleMaskLegal(NewMask, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
- NewMask);
+ SDValue Val;
+ // If we have an implict truncate do truncate here as long as it's legal.
+ // if it's not legal, this should
+ if (VT.getScalarType() != InVal.getValueType() &&
+ InVal.getValueType().isScalarInteger() &&
+ isTypeLegal(VT.getScalarType())) {
+ Val =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+ if (VT.getScalarType() == InVecT.getScalarType() &&
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
+ TLI.isShuffleMaskLegal(NewMask, VT)) {
+ Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask);
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return Val;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
+ ZeroIdx);
+ return Val;
+ }
+ }
}
}
@@ -15694,12 +16375,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N1.isUndef())
return N0;
+ // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
+ // us to pull BITCASTs from input to output.
+ if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
+ if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
+
// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
return N1.getOperand(0);
+ // If we are inserting a bitcast value into an undef, with the same
+ // number of elements, just use the bitcast input of the extract.
+ // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+ if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+ N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(0).getOperand(1) == N2 &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements()) {
+ return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+ }
+
+ // If both N1 and N2 are bitcast values on which insert_subvector
+ // would makes sense, pull the bitcast through.
+ // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+ SDValue CN0 = N0.getOperand(0);
+ SDValue CN1 = N1.getOperand(0);
+ if (CN0.getValueType().getVectorElementType() ==
+ CN1.getValueType().getVectorElementType() &&
+ CN0.getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements()) {
+ SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ CN0.getValueType(), CN0, CN1, N2);
+ return DAG.getBitcast(VT, NewINSERT);
+ }
+ }
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -15779,7 +16495,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
+ SDValue RHS = peekThroughBitcast(N->getOperand(1));
SDLoc DL(N);
// Make sure we're not running after operation legalization where it
@@ -15790,9 +16506,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (N->getOpcode() != ISD::AND)
return SDValue();
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
-
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
@@ -15945,7 +16658,6 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
-
// fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
@@ -16418,7 +17130,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16429,7 +17141,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode*> Built;
+ std::vector<SDNode *> Built;
SDValue S =
TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
@@ -16464,7 +17176,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16475,7 +17187,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode*> Built;
+ std::vector<SDNode *> Built;
SDValue S =
TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
@@ -16760,8 +17472,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (Op1->isInvariant() && Op0->writeMem())
return false;
- unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
- unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+ unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
+ unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
// Check for BaseIndexOffset matching.
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
@@ -16957,7 +17669,11 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
- SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+ if (OptLevel == CodeGenOpt::None)
+ return OldChain;
+
+ // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Aliases;
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
@@ -16987,6 +17703,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// to go from a partially-merged state to the desired final
// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b2599b2e17f1..d3c94b5f9e6b 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -63,6 +63,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -98,11 +101,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
// No-op casts are trivially coalesced by fast-isel.
if (const auto *Cast = dyn_cast<CastInst>(I))
- if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
- !hasTrivialKill(Cast->getOperand(0)))
+ if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
return false;
// Even the value might have only one use in the LLVM IR, it is possible that
@@ -1133,6 +1132,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::lifetime_end:
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
+ // Neither does the sideeffect intrinsic.
+ case Intrinsic::sideeffect:
// Neither does the assume intrinsic; it's also OK not to codegen its operand.
case Intrinsic::assume:
return true;
@@ -1187,7 +1188,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// into an indirect DBG_VALUE.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- Op->getReg(), 0, DI->getVariable(), DI->getExpression());
+ Op->getReg(), DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
@@ -1212,35 +1213,32 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(0U)
- .addImm(DI->getOffset())
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+ DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
- bool IsIndirect = DI->getOffset() != 0;
+ bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
- DI->getOffset(), DI->getVariable(), DI->getExpression());
+ DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b736037d71dd..c7cdb49203b1 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -17,11 +17,14 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -32,12 +35,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index b96c96f0b4df..cc9b41b4b487 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -21,14 +21,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -673,7 +673,6 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
- uint64_t Offset = SD->getOffset();
MDNode *Var = SD->getVariable();
MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
@@ -685,7 +684,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx())
- .addImm(Offset)
+ .addImm(0)
.addMetadata(Var)
.addMetadata(Expr);
}
@@ -727,11 +726,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- MIB.addImm(Offset);
- else {
- assert(Offset == 0 && "direct value cannot have an offset");
+ MIB.addImm(0U);
+ else
MIB.addReg(0U, RegState::Debug);
- }
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -938,10 +935,14 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
break;
}
- case ISD::EH_LABEL: {
- MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL: {
+ unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL)
+ ? TargetOpcode::EH_LABEL
+ : TargetOpcode::ANNOTATION_LABEL;
+ MCSymbol *S = cast<LabelSDNode>(Node)->getLabel();
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
- TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ TII->get(Opc)).addSym(S);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7e4bc3ccb5d3..bb1dc17b7a1b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,37 +11,65 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "legalizedag"
namespace {
-struct FloatSignAsInt;
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+ uint8_t SignBit;
+};
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and
@@ -54,7 +82,6 @@ struct FloatSignAsInt;
/// as part of its processing. For example, if a target does not support a
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's.
-///
class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
@@ -165,11 +192,13 @@ private:
public:
// Node replacement helpers
+
void ReplacedNode(SDNode *N) {
LegalizedNodes.erase(N);
if (UpdatedNodes)
UpdatedNodes->insert(N);
}
+
void ReplaceNode(SDNode *Old, SDNode *New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
dbgs() << " with: "; New->dump(&DAG));
@@ -182,6 +211,7 @@ public:
UpdatedNodes->insert(New);
ReplacedNode(Old);
}
+
void ReplaceNode(SDValue Old, SDValue New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
dbgs() << " with: "; New->dump(&DAG));
@@ -191,6 +221,7 @@ public:
UpdatedNodes->insert(New.getNode());
ReplacedNode(Old.getNode());
}
+
void ReplaceNode(SDNode *Old, const SDValue *New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
@@ -205,7 +236,8 @@ public:
ReplacedNode(Old);
}
};
-}
+
+} // end anonymous namespace
/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
@@ -376,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
@@ -434,172 +467,184 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- SDValue Chain = ST->getChain();
- SDValue Ptr = ST->getBasePtr();
- SDLoc dl(Node);
-
- unsigned Alignment = ST->getAlignment();
- MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
- AAMDNodes AAInfo = ST->getAAInfo();
-
- if (!ST->isTruncatingStore()) {
- if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- ReplaceNode(ST, OptStore);
- return;
- }
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDLoc dl(Node);
- {
- SDValue Value = ST->getValue();
- MVT VT = Value.getSimpleValueType();
- switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal: {
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
- const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- SDValue Result = TLI.expandUnalignedStore(ST, DAG);
- ReplaceNode(SDValue(ST, 0), Result);
- }
- break;
- }
- case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res && Res != SDValue(Node, 0))
- ReplaceNode(SDValue(Node, 0), Res);
- return;
- }
- case TargetLowering::Promote: {
- MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
- assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
- "Can only promote stores to same size type");
- Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- return;
- }
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ if (!ST->isTruncatingStore()) {
+ DEBUG(dbgs() << "Legalizing store operation\n");
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ } else
+ DEBUG(dbgs() << "Legal store\n");
+ break;
+ }
+ case TargetLowering::Custom: {
+ DEBUG(dbgs() << "Trying custom lowering\n");
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ auto &DL = DAG.getDataLayout();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (DL.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(
+ Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
} else {
- SDValue Value = ST->getValue();
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
- auto &DL = DAG.getDataLayout();
-
- if (StWidth != StVT.getStoreSizeInBits()) {
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
- Value = DAG.getZeroExtendInReg(Value, dl, StVT);
- SDValue Result =
- DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
- // If not storing a power-of-2 number of bits, expand as two stores.
- assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Store size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi;
- unsigned IncrementSize;
-
- if (DL.isLittleEndian()) {
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
- // Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
- Hi = DAG.getNode(
- ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(RoundWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(
- Chain, dl, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
- } else {
- // Big endian - avoid unaligned stores.
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
- // Store the top RoundWidth bits.
- Hi = DAG.getNode(
- ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(ExtraWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
- Lo = DAG.getTruncStore(
- Chain, dl, Value, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
- }
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
- // The order of the stores doesn't matter.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- ReplaceNode(SDValue(Node, 0), Result);
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getTruncStore(
+ Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ SDValue Result;
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ if (TLI.isTypeLegal(StVT)) {
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
} else {
- switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal: {
- EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- SDValue Result = TLI.expandUnalignedStore(ST, DAG);
- ReplaceNode(SDValue(ST, 0), Result);
- }
- break;
- }
- case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res && Res != SDValue(Node, 0))
- ReplaceNode(SDValue(Node, 0), Res);
- return;
- }
- case TargetLowering::Expand:
- assert(!StVT.isVector() &&
- "Vector Stores are handled in LegalizeVectorOps");
-
- // TRUNCSTORE:i16 i32 -> STORE i16
- assert(TLI.isTypeLegal(StVT) &&
- "Do not know how to expand this store!");
- Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
+ // The in-memory type isn't legal. Truncate to the type it would promote
+ // to, and then do a truncstore.
+ Value = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
+ Value);
+ Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ StVT, Alignment, MMOFlags, AAInfo);
}
+
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
}
+ }
}
void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
@@ -611,6 +656,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
+ DEBUG(dbgs() << "Legalizing non-extending load operation\n");
MVT VT = Node->getSimpleValueType(0);
SDValue RVal = SDValue(Node, 0);
SDValue RChain = SDValue(Node, 1);
@@ -629,13 +675,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
break;
}
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
RVal = Res;
RChain = Res.getValue(1);
}
break;
- }
+
case TargetLowering::Promote: {
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
@@ -660,6 +706,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
return;
}
+ DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
unsigned Alignment = LD->getAlignment();
@@ -795,7 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
case TargetLowering::Custom:
isCustom = true;
LLVM_FALLTHROUGH;
- case TargetLowering::Legal: {
+ case TargetLowering::Legal:
Value = SDValue(Node, 0);
Chain = SDValue(Node, 1);
@@ -816,8 +863,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
break;
- }
- case TargetLowering::Expand:
+
+ case TargetLowering::Expand: {
EVT DestVT = Node->getValueType(0);
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
@@ -883,6 +930,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Result.getValue(1);
break;
}
+ }
}
// Since loads produce two values, make sure to remember that we legalized
@@ -907,6 +955,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -932,7 +981,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
- if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ // Allow illegal target nodes and illegal registers.
+ if (Node->getOpcode() == ISD::TargetConstant ||
+ Node->getOpcode() == ISD::Register)
return;
#ifndef NDEBUG
@@ -946,7 +997,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
TargetLowering::TypeLegal ||
TLI.isTypeLegal(Op.getValueType()) ||
- Op.getOpcode() == ISD::TargetConstant) &&
+ Op.getOpcode() == ISD::TargetConstant ||
+ Op.getOpcode() == ISD::Register) &&
"Unexpected illegal type!");
#endif
@@ -983,11 +1035,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
break;
}
- case ISD::ATOMIC_STORE: {
+ case ISD::ATOMIC_STORE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(2).getValueType());
break;
- }
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::BR_CC: {
@@ -1072,6 +1123,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
break;
case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
@@ -1090,7 +1142,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
Node->getValueType(0));
break;
-
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1141,8 +1192,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (SAO != Op2)
NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
}
+ break;
}
- break;
}
if (NewNode != Node) {
@@ -1151,8 +1202,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
switch (Action) {
case TargetLowering::Legal:
+ DEBUG(dbgs() << "Legal node: nothing to do\n");
return;
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
+ DEBUG(dbgs() << "Trying custom legalization\n");
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
@@ -1160,6 +1213,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
if (Node->getNumValues() == 1) {
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
// We can just directly replace this node with the lowered value.
ReplaceNode(SDValue(Node, 0), Res);
return;
@@ -1168,11 +1222,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
ResultVals.push_back(Res.getValue(i));
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
ReplaceNode(Node, ResultVals.data());
return;
}
+ DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
- }
case TargetLowering::Expand:
if (ExpandNode(Node))
return;
@@ -1198,13 +1253,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
break;
- case ISD::LOAD: {
+ case ISD::LOAD:
return LegalizeLoadOps(Node);
- }
- case ISD::STORE: {
+ case ISD::STORE:
return LegalizeStoreOps(Node);
}
- }
}
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1240,7 +1293,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load). Also, the store might be
- // dependent on the extractelement and introduce a cycle when creating
+ // dependent on the extractelement and introduce a cycle when creating
// the load.
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
ST->hasPredecessor(Op.getNode()))
@@ -1361,22 +1414,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
}
-namespace {
-/// Keeps track of state when getting the sign of a floating-point value as an
-/// integer.
-struct FloatSignAsInt {
- EVT FloatVT;
- SDValue Chain;
- SDValue FloatPtr;
- SDValue IntPtr;
- MachinePointerInfo IntPointerInfo;
- MachinePointerInfo FloatPointerInfo;
- SDValue IntValue;
- APInt SignMask;
- uint8_t SignBit;
-};
-}
-
/// Bitcast a floating-point value to an integer value. Only bitcast the part
/// containing the sign bit if the target has no integer value capable of
/// holding all bits of the floating-point value.
@@ -1753,8 +1790,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
// We do this in two phases; first to check the legality of the shuffles,
// and next, assuming that all shuffles are legal, to create the new nodes.
for (int Phase = 0; Phase < 2; ++Phase) {
- SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
- NewIntermedVals;
+ SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals,
+ NewIntermedVals;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
if (V.isUndef())
@@ -1977,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position and that the return types match.
SDValue TCChain = InChain;
- const Function *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
bool isTailCall =
TLI.isInTailCallPosition(DAG, Node, TCChain) &&
- (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy());
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
if (isTailCall)
InChain = TCChain;
@@ -1996,10 +2033,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
- if (!CallInfo.second.getNode())
+ if (!CallInfo.second.getNode()) {
+ DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
+ }
+ DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
return CallInfo.first;
}
@@ -2285,9 +2325,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
// TODO: Should any fast-math-flags be set for the created nodes?
-
+ DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
- // simple 32-bit [signed|unsigned] integer to float/double expansion
+ DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
@@ -2352,6 +2393,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// and in all alternate rounding modes.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
SDValue TwoP52 =
DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
SDValue TwoP84PlusTwoP52 =
@@ -2372,9 +2414,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
if (!isSigned) {
@@ -2498,7 +2540,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
unsigned OpToUse = 0;
// Scan for the appropriate larger type to use.
- while (1) {
+ while (true) {
NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
assert(NewInTy.isInteger() && "Ran out of possibilities!");
@@ -2539,7 +2581,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
unsigned OpToUse = 0;
// Scan for the appropriate larger type to use.
- while (1) {
+ while (true) {
NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
assert(NewOutTy.isInteger() && "Ran out of possibilities!");
@@ -2559,7 +2601,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
// Otherwise, try a larger type.
}
-
// Okay, we found the operation and type to use.
SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
@@ -2745,7 +2786,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
EVT VT = Op.getValueType();
- unsigned len = VT.getSizeInBits();
+ unsigned Len = VT.getSizeInBits();
if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
EVT SetCCVT = getSetCCResultType(VT);
@@ -2753,7 +2794,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(len, dl, VT), CTLZ);
+ DAG.getConstant(Len, dl, VT), CTLZ);
}
// for now, we do this:
@@ -2766,7 +2807,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
//
// Ref: "Hacker's Delight" by Henry Warren
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
@@ -2778,11 +2819,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// This trivially expands to CTTZ.
return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
+ EVT VT = Op.getValueType();
+ unsigned Len = VT.getSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT = getSetCCResultType(VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(Len, dl, VT), CTTZ);
+ }
+
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
- EVT VT = Op.getValueType();
SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNOT(dl, Op, VT),
DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2799,6 +2851,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2983,8 +3036,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// NOTE: we could fall back on load/store here too for targets without
// SRA. However, it is doubtful that any exist.
EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- if (VT.isVector())
- ShiftAmountTy = VT;
unsigned BitsDiff = VT.getScalarSizeInBits() -
ExtraVT.getScalarSizeInBits();
SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
@@ -3062,10 +3113,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::INSERT_SUBVECTOR:
Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
break;
- case ISD::CONCAT_VECTORS: {
+ case ISD::CONCAT_VECTORS:
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
- }
case ISD::SCALAR_TO_VECTOR:
Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
break;
@@ -3083,14 +3133,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
if (!TLI.isTypeLegal(EltVT)) {
-
EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
// BUILD_VECTOR operands are allowed to be wider than the element type.
// But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
// it.
if (NewEltVT.bitsLT(EltVT)) {
-
// Convert shuffle node.
// If original node was v4i64 and the new EltVT is i32,
// cast operands to v8i32 and re-build the mask.
@@ -3261,6 +3309,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
case ISD::FP_TO_FP16:
+ DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3457,7 +3506,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- //
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
@@ -3666,10 +3714,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
- // We test only the i1 bit. Skip the AND if UNDEF.
- Tmp3 = (Tmp2.isUndef()) ? Tmp2 :
- DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
- DAG.getConstant(1, dl, Tmp2.getValueType()));
+ // We test only the i1 bit. Skip the AND if UNDEF or another AND.
+ if (Tmp2.isUndef() ||
+ (Tmp2.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
+ dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ Tmp3 = Tmp2;
+ else
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, dl, Tmp2.getValueType()));
Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
DAG.getCondCode(ISD::SETNE), Tmp3,
DAG.getConstant(0, dl, Tmp3.getValueType()),
@@ -3865,17 +3918,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (Results.empty())
+ if (Results.empty()) {
+ DEBUG(dbgs() << "Cannot expand node\n");
return false;
+ }
+ DEBUG(dbgs() << "Succesfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
unsigned Opc = Node->getOpcode();
switch (Opc) {
case ISD::ATOMIC_FENCE: {
@@ -4057,6 +4113,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::REM_PPCF128));
break;
case ISD::FMA:
+ case ISD::STRICT_FMA:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
@@ -4126,8 +4183,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (!Results.empty())
+ if (!Results.empty()) {
+ DEBUG(dbgs() << "Successfully converted node to libcall\n");
ReplaceNode(Node, Results.data());
+ } else
+ DEBUG(dbgs() << "Could not convert node to libcall\n");
}
// Determine the vector type to use in place of an original scalar element when
@@ -4141,6 +4201,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
}
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to promote node\n");
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
@@ -4369,7 +4430,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FPOW: {
+ case ISD::FPOW:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
@@ -4377,8 +4438,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
- }
- case ISD::FMA: {
+ case ISD::FMA:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
@@ -4387,7 +4447,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
DAG.getIntPtrConstant(0, dl)));
break;
- }
case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4419,13 +4478,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FLOG10:
case ISD::FABS:
case ISD::FEXP:
- case ISD::FEXP2: {
+ case ISD::FEXP2:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
- }
case ISD::BUILD_VECTOR: {
MVT EltVT = OVT.getVectorElementType();
MVT NewEltVT = NVT.getVectorElementType();
@@ -4579,8 +4637,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (!Results.empty())
+ if (!Results.empty()) {
+ DEBUG(dbgs() << "Successfully promoted node\n");
ReplaceNode(Node, Results.data());
+ } else
+ DEBUG(dbgs() << "Could not promote node\n");
}
/// This is the entry point for the file.
@@ -4602,7 +4663,7 @@ void SelectionDAG::Legalize() {
// nodes with their original operands intact. Legalization can produce
// new nodes which may themselves need to be legalized. Iterate until all
// nodes have been legalized.
- for (;;) {
+ while (true) {
bool AnyLegalized = false;
for (auto NI = allnodes_end(); NI != allnodes_begin();) {
--NI;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d48..29f0bb475b08 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ DEBUG(dbgs() << "Node has been custom expanded, done\n");
return;
+ }
switch (N->getOpcode()) {
default:
@@ -568,10 +570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
SDValue Mask = N->getOperand(0);
- EVT OpTy = N->getOperand(1).getValueType();
- // Promote all the way up to the canonical SetCC type.
- Mask = PromoteTargetBoolean(Mask, OpTy);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -773,7 +772,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
- llvm_unreachable("Not implemented");
+
+ // We need to sign-extend the operands so the carry value computed by the
+ // wide operation will be equivalent to the carry value computed by the
+ // narrow operation.
+ // An ADDCARRY can generate carry only if any of the operands has its
+ // most significant bit set. Sign extension propagates the most significant
+ // bit into the higher bits which means the extra bit that the narrow
+ // addition would need (i.e. the carry) will be propagated through the higher
+ // bits of the wide addition.
+ // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
+ // preserved by sign extension.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+
+ EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
+
+ // Do the arithmetic in the wide type.
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
+ LHS, RHS, N->getOperand(2));
+
+ // Update the users of the original carry/borrow value.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ return SDValue(Res.getNode(), 0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
@@ -885,8 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
- if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
+ }
switch (N->getOpcode()) {
default:
@@ -1206,24 +1230,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
// When the data operand has illegal type, we should legalize the data
// operand first. The mask will be promoted/splitted/widened according to
// the data operand type.
- if (TLI.isTypeLegal(DataVT))
+ if (TLI.isTypeLegal(DataVT)) {
Mask = PromoteTargetBoolean(Mask, DataVT);
- else {
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
- return PromoteIntOp_MSTORE(N, 3);
-
- else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
- return WidenVecOp_MSTORE(N, 3);
-
- else {
- assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
- return SplitVecOp_MSTORE(N, 3);
- }
+ // Update in place.
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[2] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
+ if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
} else { // Data operand
assert(OpNo == 3 && "Unexpected operand for promotion");
DataOp = GetPromotedInteger(DataOp);
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
}
@@ -1250,6 +1273,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
// The Mask
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
@@ -1270,6 +1296,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
// The Mask
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -3224,8 +3253,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getTruncStore(
Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -3260,8 +3288,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3462,7 +3489,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- EVT InElemTy = OutVT.getVectorElementType();
EVT OutElemTy = NOutVT.getVectorElementType();
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
@@ -3471,15 +3497,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
+ // If the input type is legal and we can promote it to a legal type with the
+ // same element size, go ahead do that to create a new concat.
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeLegal) {
+ EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
+ if (TLI.isTypeLegal(InPromotedTy)) {
+ SmallVector<SDValue, 8> Ops(NumOperands);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
+ N->getOperand(i));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
+ }
+ }
+
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
SDValue Op = N->getOperand(i);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ EVT SclrTy = Op.getValueType().getVectorElementType();
+ assert(NumElem == Op.getValueType().getVectorNumElements() &&
+ "Unexpected number of elements");
+
for (unsigned j = 0; j < NumElem; ++j) {
SDValue Ext = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 001eed9fb8f6..b60d7bca498a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,7 +14,9 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
@@ -222,15 +224,21 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- if (IgnoreNodeResults(N))
+ DEBUG(dbgs() << "Legalizing node: "; N->dump());
+ if (IgnoreNodeResults(N)) {
+ DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
+ }
// Scan the values produced by the node, checking to see if any result
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
+ DEBUG(dbgs() << "Analyzing result type: " <<
+ ResultVT.getEVTString() << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
+ DEBUG(dbgs() << "Legal result type\n");
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
@@ -287,9 +295,12 @@ ScanOperands:
if (IgnoreNodeResults(N->getOperand(i).getNode()))
continue;
- EVT OpVT = N->getOperand(i).getValueType();
+ const auto Op = N->getOperand(i);
+ DEBUG(dbgs() << "Analyzing operand: "; Op.dump());
+ EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
case TargetLowering::TypeLegal:
+ DEBUG(dbgs() << "Legal operand\n");
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
@@ -832,6 +843,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
AnalyzeNewValue(Lo);
AnalyzeNewValue(Hi);
+ // Transfer debug values. Don't invalidate the source debug value until it's
+ // been transferred to the high and low bits.
+ if (DAG.getDataLayout().isBigEndian()) {
+ DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(),
+ Lo.getValueSizeInBits());
+ } else {
+ DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(),
+ Hi.getValueSizeInBits());
+ }
+
// Remember that this is the result of the node.
std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
assert(!Entry.first.getNode() && "Node already expanded");
@@ -1002,8 +1025,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
// Update the widening map.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
- SetWidenedVector(SDValue(N, i), Results[i]);
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ // If this is a chain output just replace it.
+ if (Results[i].getValueType() == MVT::Other)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ else
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ }
return true;
}
@@ -1117,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
-/// Widen the given target boolean to a target boolean of the given type.
-/// The boolean vector is widened and then promoted to match the target boolean
-/// type of the given ValVT.
-SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
- bool WithZeroes) {
- SDLoc dl(Bool);
- EVT BoolVT = Bool.getValueType();
-
- assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
- TLI.isTypeLegal(ValVT) &&
- "Unexpected types in WidenTargetBoolean");
- EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
- ValVT.getVectorNumElements());
- Bool = ModifyToType(Bool, WideVT, WithZeroes);
- return PromoteTargetBoolean(Bool, ValVT);
-}
-
/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
EVT LoVT, EVT HiVT,
@@ -1142,9 +1153,14 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
Op.getValueSizeInBits() && "Invalid integer splitting!");
Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ unsigned ReqShiftAmountInBits =
+ Log2_32_Ceil(Op.getValueType().getSizeInBits());
+ MVT ShiftAmountTy =
+ TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType());
+ if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits())
+ ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits));
Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
- DAG.getConstant(LoVT.getSizeInBits(), dl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy));
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c46d1b04804c..64cb80e0d853 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -18,9 +18,9 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -89,7 +89,8 @@ private:
/// Pretend all of this node's results are legal.
bool IgnoreNodeResults(SDNode *N) const {
- return N->getOpcode() == ISD::TargetConstant;
+ return N->getOpcode() == ISD::TargetConstant ||
+ N->getOpcode() == ISD::Register;
}
/// For integer nodes that are below legal width, this map indicates what
@@ -182,10 +183,6 @@ private:
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
- /// Modify Bit Vector to match SetCC result type of ValVT.
- /// The bit vector is widened with zeroes when WithZeroes is true.
- SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
-
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -400,18 +397,22 @@ private:
/// Given an operand Op of Float type, returns the integer if the Op is not
/// supported in target HW and converted to the integer.
/// The integer contains exactly the same bits as Op - only the type changed.
- /// For example, if Op is an f32 which was softened to an i32, then this method
- /// returns an i32, the bits of which coincide with those of Op.
+ /// For example, if Op is an f32 which was softened to an i32, then this
+ /// method returns an i32, the bits of which coincide with those of Op.
/// If the Op can be efficiently supported in target HW or the operand must
/// stay in a register, the Op is not converted to an integer.
/// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
- SDValue &SoftenedOp = SoftenedFloats[Op];
- if (!SoftenedOp.getNode() &&
- isSimpleLegalType(Op.getValueType()))
+ auto Iter = SoftenedFloats.find(Op);
+ if (Iter == SoftenedFloats.end()) {
+ assert(isSimpleLegalType(Op.getValueType()) &&
+ "Operand wasn't converted to integer?");
return Op;
+ }
+
+ SDValue &SoftenedOp = Iter->second;
+ assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
RemapValue(SoftenedOp);
- assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
@@ -618,7 +619,6 @@ private:
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_VSETCC(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -727,7 +727,6 @@ private:
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
- SDValue WidenVecRes_VSETCC(SDNode* N);
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f3306151d864..993465ae9dc2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
St->getMemOperand()->getFlags(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9355dbe77f94..74970ab5792c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -27,15 +27,34 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
+
class VectorLegalizer {
SelectionDAG& DAG;
const TargetLowering &TLI;
- bool Changed; // Keep track of whether anything changed
+ bool Changed = false; // Keep track of whether anything changed
/// For nodes that are of legal width, and that have more than one use, this
/// map indicates what regularized operand to use. This allows us to avoid
@@ -128,12 +147,15 @@ class VectorLegalizer {
SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
public:
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
+
/// \brief Begin legalizer the vector operations in the DAG.
bool Run();
- VectorLegalizer(SelectionDAG& dag) :
- DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
};
+} // end anonymous namespace
+
bool VectorLegalizer::Run() {
// Before we start legalizing vector nodes, check if there are any vectors.
bool HasVectors = false;
@@ -475,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
- EVT NewVT;
+ EVT NewVT = VT;
unsigned NewOpc;
- while (1) {
- NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ while (true) {
+ NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;
@@ -490,12 +512,19 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
}
}
- SDLoc loc(Op);
- SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
- return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+ SDLoc dl(Op);
+ SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NewVT, Promoted,
+ DAG.getValueType(VT.getScalarType()));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
}
-
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
@@ -503,7 +532,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
EVT SrcEltVT = SrcVT.getScalarType();
unsigned NumElem = SrcVT.getVectorNumElements();
-
SDValue NewChain;
SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
@@ -534,7 +562,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
unsigned Offset = 0;
unsigned RemainingBytes = SrcVT.getStoreSize();
SmallVector<SDValue, 8> LoadVals;
-
while (RemainingBytes > 0) {
SDValue ScalarLoad;
unsigned LoadBytes = WideBytes;
@@ -560,9 +587,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
RemainingBytes -= LoadBytes;
Offset += LoadBytes;
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(LoadBytes, dl,
- BasePTR.getValueType()));
+
+ BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -1117,8 +1143,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
return DAG.getBuildVector(VT, dl, Ops);
}
-}
-
bool SelectionDAG::LegalizeVectors() {
return VectorLegalizer(*this).Run();
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6aa3270883f0..8f2320f52a0f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -243,7 +243,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
// For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
// are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
// legal and was not scalarized.
- // See the similar logic in ScalarizeVecRes_VSETCC
+ // See the similar logic in ScalarizeVecRes_SETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
@@ -307,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDLoc DL(N);
// The vselect result and true/value operands needs scalarizing, but it's
// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
- // See the similar logic in ScalarizeVecRes_VSETCC
+ // See the similar logic in ScalarizeVecRes_SETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Cond = GetScalarizedVector(Cond);
} else {
@@ -380,21 +380,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
N->getOperand(4));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
-
- if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
-
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
- SDLoc DL(N);
-
- // Turn it into a scalar SETCC.
- return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
-}
-
SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
}
@@ -408,7 +393,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
return GetScalarizedVector(N->getOperand(Op));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
@@ -461,7 +446,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
case ISD::BITCAST:
Res = ScalarizeVecOp_BITCAST(N);
break;
@@ -1068,34 +1054,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (CustomLowerNode(N, N->getValueType(0), true))
return;
- // Spill the vector to the stack.
+ // Make the vector elements byte-addressable if they aren't already.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorNumElements());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ // Extend the element type to match if needed.
+ if (EltVT.bitsGT(Elt.getValueType()))
+ Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
+ }
+
+ // Spill the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store =
- DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
+ MachinePointerInfo::getUnknownStack(MF), EltVT);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
// Load the Lo part from the stack slot.
- Lo =
- DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
// Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
+ unsigned IncrementSize = LoVT.getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl,
StackPtr.getValueType()));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize));
+
+ // If we adjusted the original type, we need to truncate the results.
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ if (LoVT != Lo.getValueType())
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
+ if (HiVT != Hi.getValueType())
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -1130,8 +1139,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
Alignment, MMOFlags, AAInfo);
@@ -1283,10 +1291,19 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- // Split the input.
+ // If the input also splits, handle it directly. Otherwise split it by hand.
SDValue LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), LL, LH);
+ else
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+
+ if (getTypeAction(N->getOperand(1).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(1), RL, RH);
+ else
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -1753,30 +1770,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Make the vector elements byte-addressable if they aren't already.
SDLoc dl(N);
EVT EltVT = VecVT.getVectorElementType();
- if (EltVT.getSizeInBits() < 8) {
- SmallVector<SDValue, 4> ElementOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
- ElementOps.push_back(DAG.getAnyExtOrTrunc(
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
- DAG.getConstant(i, dl, MVT::i8)),
- dl, MVT::i8));
- }
-
+ if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
- Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
}
// Store the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT);
+ return DAG.getExtLoad(
+ ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -1886,9 +1898,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
- MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
-
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
@@ -1955,7 +1964,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
- SDValue Lo, Hi;
+ SDValue Lo;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
@@ -1970,13 +1979,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
- Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
-
- // Build a factor node to remember that this store is independent of the
- // other one.
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2007,8 +2015,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
@@ -2919,30 +2926,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- EVT BoolVT = getSetCCResultType(WidenVT);
-
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
-
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
- }
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2953,12 +2946,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getValue());
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well
- Mask = WidenTargetBoolean(Mask, WideVT, true);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
// Widen the Index operand
SDValue Index = N->getIndex();
@@ -3032,7 +3029,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
// Make a new Mask node, with a legal result VT.
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+ for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
Ops.push_back(InMask->getOperand(i));
SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
@@ -3065,12 +3062,9 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
EVT SubVT = Mask->getValueType(0);
- SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
- SubConcatOps[0] = Mask;
- for (unsigned i = 1; i < NumSubVecs; ++i)
- SubConcatOps[i] = DAG.getUNDEF(SubVT);
- Mask =
- DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+ SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT));
+ SubOps[0] = Mask;
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
}
assert((Mask->getValueType(0) == ToMaskVT) &&
@@ -3105,7 +3099,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
// If this is a splitted VSELECT that was previously already handled, do
// nothing.
- if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+ EVT CondVT = Cond->getValueType(0);
+ if (CondVT.getScalarSizeInBits() != 1)
return SDValue();
EVT VSelVT = N->getValueType(0);
@@ -3129,6 +3124,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
if (SetCCResVT.getScalarSizeInBits() == 1)
return SDValue();
+ } else if (CondVT.getScalarType() == MVT::i1) {
+ // If there is support for an i1 vector mask (or only scalar i1 conditions),
+ // don't touch.
+ while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+ CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+ if (CondVT.getScalarType() == MVT::i1)
+ return SDValue();
}
// Get the VT and operands for VSELECT, and widen if needed.
@@ -3236,19 +3239,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
N->getOperand(1), InOp1, InOp2, N->getOperand(4));
}
-SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
- if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
- InOp1, InOp2, N->getOperand(2));
-}
-
SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getUNDEF(WidenVT);
@@ -3279,7 +3269,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
-SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operands must be vectors");
@@ -3556,6 +3546,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 3 && "Can widen only data operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
@@ -3564,25 +3555,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
- if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- // The mask should be widened as well.
- EVT BoolVT = getSetCCResultType(WideVal.getValueType());
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes.
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
+ // The mask should be widened as well.
+ EVT WideVT = WideVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
- }
assert(Mask.getValueType().getVectorNumElements() ==
WideVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
@@ -3596,15 +3575,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
+ EVT MaskVT = Mask.getValueType();
// Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
EVT WideVT = WideVal.getValueType();
- unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well.
- Mask = WidenTargetBoolean(Mask, WideVT, true);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
// Widen index.
SDValue Index = MSC->getIndex();
@@ -3806,8 +3788,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
while (LdWidth > 0) {
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl, BasePtr.getValueType()));
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
SDValue L;
if (LdWidth < NewVTWidth) {
@@ -3839,7 +3820,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
LdOps.push_back(L);
-
+ LdOp = L;
LdWidth -= NewVTWidth;
}
@@ -3929,10 +3910,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr,
- DAG.getConstant(Offset, dl,
- BasePtr.getValueType()));
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
Align, MMOFlags, AAInfo);
@@ -3987,9 +3965,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl,
- BasePtr.getValueType()));
+
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
// Cast the vector to the scalar type we can store.
@@ -4008,9 +3985,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl,
- BasePtr.getValueType()));
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
} while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth / ValEltWidth;
@@ -4053,10 +4028,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr,
- DAG.getConstant(Offset, dl,
- BasePtr.getValueType()));
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index a21b4c733254..379f0dcef513 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -22,12 +22,12 @@
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 237d541b4cb9..cf92907a8b5f 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -20,32 +20,31 @@
namespace llvm {
-class MDNode;
+class DIVariable;
+class DIExpression;
class SDNode;
class Value;
-/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Holds the information from a dbg_value node through SDISel.
/// We do not use SDValue here to avoid including its header.
-
class SDDbgValue {
public:
enum DbgValueKind {
- SDNODE = 0, // value is the result of an expression
- CONST = 1, // value is a constant
- FRAMEIX = 2 // value is contents of a stack location
+ SDNODE = 0, ///< Value is the result of an expression.
+ CONST = 1, ///< Value is a constant.
+ FRAMEIX = 2 ///< Value is contents of a stack location.
};
private:
union {
struct {
- SDNode *Node; // valid for expressions
- unsigned ResNo; // valid for expressions
+ SDNode *Node; ///< Valid for expressions.
+ unsigned ResNo; ///< Valid for expressions.
} s;
- const Value *Const; // valid for constants
- unsigned FrameIx; // valid for stack objects
+ const Value *Const; ///< Valid for constants.
+ unsigned FrameIx; ///< Valid for stack objects.
} u;
- MDNode *Var;
- MDNode *Expr;
- uint64_t Offset;
+ DIVariable *Var;
+ DIExpression *Expr;
DebugLoc DL;
unsigned Order;
enum DbgValueKind kind;
@@ -53,71 +52,65 @@ private:
bool Invalid = false;
public:
- // Constructor for non-constants.
- SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
- uint64_t off, DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(indir) {
+ /// Constructor for non-constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R,
+ bool indir, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
}
- // Constructor for constants.
- SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
- DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(false) {
+ /// Constructor for constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
kind = CONST;
u.Const = C;
}
- // Constructor for frame indices.
- SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+ /// Constructor for frame indices.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl,
unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(false) {
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
- // Returns the kind.
+ /// Returns the kind.
DbgValueKind getKind() const { return kind; }
- // Returns the MDNode pointer for the variable.
- MDNode *getVariable() const { return Var; }
+ /// Returns the DIVariable pointer for the variable.
+ DIVariable *getVariable() const { return Var; }
- // Returns the MDNode pointer for the expression.
- MDNode *getExpression() const { return Expr; }
+ /// Returns the DIExpression pointer for the expression.
+ DIExpression *getExpression() const { return Expr; }
- // Returns the SDNode* for a register ref
+ /// Returns the SDNode* for a register ref
SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
- // Returns the ResNo for a register ref
+ /// Returns the ResNo for a register ref
unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
- // Returns the Value* for a constant
+ /// Returns the Value* for a constant
const Value *getConst() const { assert (kind==CONST); return u.Const; }
- // Returns the FrameIx for a stack object
+ /// Returns the FrameIx for a stack object
unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
- // Returns whether this is an indirect value.
+ /// Returns whether this is an indirect value.
bool isIndirect() const { return IsIndirect; }
- // Returns the offset.
- uint64_t getOffset() const { return Offset; }
-
- // Returns the DebugLoc.
+ /// Returns the DebugLoc.
DebugLoc getDebugLoc() const { return DL; }
- // Returns the SDNodeOrder. This is the order of the preceding node in the
- // input.
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
unsigned getOrder() const { return Order; }
- // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
- // property. A SDDbgValue is invalid if the SDNode that produces the value is
- // deleted.
+ /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ /// property. A SDDbgValue is invalid if the SDNode that produces the value is
+ /// deleted.
void setIsInvalidated() { Invalid = true; }
bool isInvalidated() const { return Invalid; }
};
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 137994093277..698e14453d1d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -18,13 +18,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 70b1fa77a099..49f304c8cc86 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,4 +1,4 @@
-//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,23 +16,47 @@
//===----------------------------------------------------------------------===//
#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -46,6 +70,7 @@ static RegisterScheduler
burrListDAGScheduler("list-burr",
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
+
static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
@@ -105,6 +130,7 @@ static cl::opt<unsigned> AvgIPC(
cl::desc("Average inst/cycle whan no target itinerary exists."));
namespace {
+
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
/// implementation. This supports both top-down and bottom-up scheduling.
@@ -112,7 +138,6 @@ namespace {
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
/// NeedLatency - True if the scheduler will make use of latency information.
- ///
bool NeedLatency;
/// AvailableQueue - The priority queue to use for the available SUnits.
@@ -122,13 +147,13 @@ private:
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands becomes available, the instruction is
/// added to the AvailableQueue.
- std::vector<SUnit*> PendingQueue;
+ std::vector<SUnit *> PendingQueue;
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
/// CurCycle - The current scheduler state corresponds to this cycle.
- unsigned CurCycle;
+ unsigned CurCycle = 0;
/// MinAvailableCycle - Cycle of the soonest available instruction.
unsigned MinAvailableCycle;
@@ -147,7 +172,9 @@ private:
// Collect interferences between physical register use/defs.
// Each interference is an SUnit and set of physical registers.
SmallVector<SUnit*, 4> Interferences;
- typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+
+ using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
+
LRegsMapT LRegsMap;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
@@ -163,9 +190,8 @@ public:
SchedulingPriorityQueue *availqueue,
CodeGenOpt::Level OptLevel)
: ScheduleDAGSDNodes(mf),
- NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ NeedLatency(needlatency), AvailableQueue(availqueue),
Topo(SUnits, nullptr) {
-
const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
@@ -267,6 +293,7 @@ private:
return !NeedLatency;
}
};
+
} // end anonymous namespace
/// GetCostForDef - Looks up the register class and cost for a given definition.
@@ -319,13 +346,13 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs()
- << "********** List Scheduling BB#" << BB->getNumber()
- << " '" << BB->getName() << "' **********\n");
+ DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
CurCycle = 0;
IssueCount = 0;
- MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ MinAvailableCycle =
+ DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
@@ -409,7 +436,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
unsigned NestLevel,
const TargetInstrInfo *TII) {
SDNode *N = Outer;
- for (;;) {
+ while (true) {
if (N == Inner)
return true;
// For a TokenFactor, examine each operand. There may be multiple ways
@@ -456,7 +483,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
static SDNode *
FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
const TargetInstrInfo *TII) {
- for (;;) {
+ while (true) {
// For a TokenFactor, examine each operand. There may be multiple ways
// to get to the CALLSEQ_BEGIN, but we need to find the path with the
// most nesting in order to ensure that we find the corresponding match.
@@ -550,6 +577,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
unsigned NestLevel = 0;
unsigned MaxNest = 0;
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+ assert(N && "Must find call sequence start");
SUnit *Def = &SUnits[N->getNodeId()];
CallSeqEndForStart[Def] = SU;
@@ -571,7 +599,7 @@ void ScheduleDAGRRList::ReleasePending() {
// If the available queue is empty, it is safe to reset MinAvailableCycle.
if (AvailableQueue->empty())
- MinAvailableCycle = UINT_MAX;
+ MinAvailableCycle = std::numeric_limits<unsigned>::max();
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
@@ -791,7 +819,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
AvailableQueue->remove(PredSU);
}
- assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
++PredSU->NumSuccsLeft;
}
@@ -821,9 +850,13 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ SUnit *SeqEnd = CallSeqEndForStart[SU];
+ assert(SeqEnd && "Call sequence start/end must be known");
+ assert(!LiveRegDefs[CallResource]);
+ assert(!LiveRegGens[CallResource]);
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
- LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ LiveRegGens[CallResource] = SeqEnd;
}
}
@@ -835,6 +868,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
if (SUNode->isMachineOpcode() &&
SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[CallResource]);
+ assert(LiveRegGens[CallResource]);
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
LiveRegGens[CallResource] = nullptr;
@@ -891,7 +926,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
if (LookAhead == 0)
return;
- std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
unsigned HazardCycle = (*I)->getHeight();
for (auto E = Sequence.end(); I != E; ++I) {
SUnit *SU = *I;
@@ -1319,8 +1354,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// If we're in the middle of scheduling a call, don't begin scheduling
// another call. Also, don't allow any physical registers to be live across
// the call.
- if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
- (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
// Check the special calling-sequence resource.
unsigned CallResource = TRI->getNumRegs();
if (LiveRegDefs[CallResource]) {
@@ -1390,27 +1424,32 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
/// (3) No Interferences: may unschedule to break register interferences.
SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- DEBUG(dbgs() << " Interfering reg " <<
- (LRegs[0] == TRI->getNumRegs() ? "CallResource"
- : TRI->getName(LRegs[0]))
- << " SU #" << CurSU->NodeNum << '\n');
- std::pair<LRegsMapT::iterator, bool> LRegsPair =
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
- if (LRegsPair.second) {
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- Interferences.push_back(CurSU);
- }
- else {
- assert(CurSU->isPending && "Interferences are pending");
- // Update the interference with current live regs.
- LRegsPair.first->second = LRegs;
+ auto FindAvailableNode = [&]() {
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs())
+ dbgs() << "CallResource";
+ else
+ dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Interferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
}
- CurSU = AvailableQueue->pop();
- }
+ };
+ FindAvailableNode();
if (CurSU)
return CurSU;
@@ -1423,7 +1462,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// Try unscheduling up to the point where it's safe to schedule
// this node.
SUnit *BtSU = nullptr;
- unsigned LiveCycle = UINT_MAX;
+ unsigned LiveCycle = std::numeric_limits<unsigned>::max();
for (unsigned Reg : LRegs) {
if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
BtSU = LiveRegGens[Reg];
@@ -1447,13 +1486,16 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// If one or more successors has been unscheduled, then the current
// node is no longer available.
- if (!TrySU->isAvailable || !TrySU->NodeQueueId)
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
+ DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
CurSU = AvailableQueue->pop();
- else {
+ } else {
+ DEBUG(dbgs() << "TrySU available\n");
// Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
}
+ FindAvailableNode();
// Interferences has been mutated. We must break.
break;
}
@@ -1540,7 +1582,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
while (AvailableQueue->empty() && !PendingQueue.empty()) {
// Advance the cycle to free resources. Skip ahead to the next ready SU.
- assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
+ "MinAvailableCycle uninitialized");
AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
}
@@ -1553,17 +1596,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
#endif
}
-//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Definition
-//===----------------------------------------------------------------------===//
-//
-// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
-// to reduce register pressure.
-//
namespace {
+
class RegReductionPQBase;
-struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+struct queue_sort {
bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
};
@@ -1571,6 +1608,7 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
template<class SF>
struct reverse_sort : public queue_sort {
SF &SortFunc;
+
reverse_sort(SF &sf) : SortFunc(sf) {}
bool operator()(SUnit* left, SUnit* right) const {
@@ -1590,6 +1628,7 @@ struct bu_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
+
bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool operator()(SUnit* left, SUnit* right) const;
@@ -1603,8 +1642,8 @@ struct src_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- src_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool operator()(SUnit* left, SUnit* right) const;
};
@@ -1617,8 +1656,8 @@ struct hybrid_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- hybrid_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1634,8 +1673,8 @@ struct ilp_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- ilp_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1644,8 +1683,8 @@ struct ilp_ls_rr_sort : public queue_sort {
class RegReductionPQBase : public SchedulingPriorityQueue {
protected:
- std::vector<SUnit*> Queue;
- unsigned CurQueueId;
+ std::vector<SUnit *> Queue;
+ unsigned CurQueueId = 0;
bool TracksRegPressure;
bool SrcOrder;
@@ -1656,13 +1695,12 @@ protected:
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
+ ScheduleDAGRRList *scheduleDAG = nullptr;
// SethiUllmanNumbers - The SethiUllman number for each node.
std::vector<unsigned> SethiUllmanNumbers;
/// RegPressure - Tracking current reg pressure per register class.
- ///
std::vector<unsigned> RegPressure;
/// RegLimit - Tracking the number of allocatable registers per register
@@ -1677,9 +1715,8 @@ public:
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
- : SchedulingPriorityQueue(hasReadyFilter),
- CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
+ : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
+ SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
RegLimit.resize(NumRC);
@@ -1730,7 +1767,7 @@ public:
void remove(SUnit *SU) override {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = find(Queue, SU);
+ std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
@@ -1759,7 +1796,7 @@ protected:
};
template<class SF>
-static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
std::vector<SUnit *>::iterator Best = Q.begin();
for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
if (Picker(*Best, *I))
@@ -1772,7 +1809,7 @@ static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
}
template<class SF>
-SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
#ifndef NDEBUG
if (DAG->StressSched) {
reverse_sort<SF> RPicker(Picker);
@@ -1783,6 +1820,13 @@ SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
return popFromQueueImpl(Q, Picker);
}
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
template<class SF>
class RegReductionPriorityQueue : public RegReductionPQBase {
SF Picker;
@@ -1815,7 +1859,7 @@ public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
- std::vector<SUnit*> DumpQueue = Queue;
+ std::vector<SUnit *> DumpQueue = Queue;
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
@@ -1826,17 +1870,11 @@ public:
#endif
};
-typedef RegReductionPriorityQueue<bu_ls_rr_sort>
-BURegReductionPriorityQueue;
-
-typedef RegReductionPriorityQueue<src_ls_rr_sort>
-SrcRegReductionPriorityQueue;
+using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
+using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
+using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
+using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
-typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
-HybridBURRPriorityQueue;
-
-typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
-ILPBURRPriorityQueue;
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -2855,7 +2893,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
/// This results in the store being scheduled immediately
/// after N, which shortens the U->N live range, reducing
/// register pressure.
-///
void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (SUnit &SU : *SUnits) {
@@ -3022,7 +3059,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3036,7 +3073,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3050,7 +3087,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3066,7 +3103,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3c8526ebb702..c09b47af26a6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -23,14 +23,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
- ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
- for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
- if (DVs[i]->isInvalidated())
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (DV->isInvalidated())
continue;
- unsigned DVOrder = DVs[i]->getOrder();
+ unsigned DVOrder = DV->getOrder();
if (!Order || DVOrder == Order) {
- MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
if (DbgMI) {
- Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ Orders.push_back({DVOrder, DbgMI});
BB->insert(InsertPos, DbgMI);
}
- DVs[i]->setIsInvalidated();
+ DV->setIsInvalidated();
}
}
}
@@ -742,16 +741,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+ auto IP = Emitter.getInsertPos();
+ if (IP == BB->begin() || BB->back().isPHI() ||
// Fast-isel may have inserted some instructions, in which case the
// BB->back().isPHI() test will not fire when we want it to.
- std::prev(Emitter.getInsertPos())->isPHI()) {
+ std::prev(IP)->isPHI()) {
// Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
+ Orders.push_back({Order, (MachineInstr *)nullptr});
return;
}
- Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos())));
+ Orders.push_back({Order, &*std::prev(IP)});
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
@@ -856,8 +856,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
- // values.
- std::sort(Orders.begin(), Orders.end(), less_first());
+ // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
+ // regardless of the host's implementation fo std::sort.
+ std::stable_sort(Orders.begin(), Orders.end(), less_first());
+ std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
+ [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
+ return LHS->getOrder() < RHS->getOrder();
+ });
SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
@@ -869,10 +874,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
- for (; DI != DE &&
- (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ for (; DI != DE; ++DI) {
+ if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
+ break;
if ((*DI)->isInvalidated())
continue;
+
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
if (DbgMI) {
if (!LastOrder)
@@ -891,11 +898,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
SmallVector<MachineInstr*, 8> DbgMIs;
- while (DI != DE) {
- if (!(*DI)->isInvalidated())
- if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
- DbgMIs.push_back(DbgMI);
- ++DI;
+ for (; DI != DE; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ assert((*DI)->getOrder() >= LastOrder &&
+ "emitting DBG_VALUE out of order");
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
}
MachineBasicBlock *InsertBB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 631cb34717c4..07b46b9183ab 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -25,13 +25,13 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -93,9 +93,8 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGVLIW::Schedule() {
- DEBUG(dbgs()
- << "********** List Scheduling BB#" << BB->getNumber()
- << " '" << BB->getName() << "' **********\n");
+ DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
BuildSchedGraph(AA);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 16f425dc7969..12a21e74079e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -37,6 +37,9 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -59,11 +62,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -87,6 +87,15 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+#define DEBUG_TYPE "selectiondag"
+
+static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
+ DEBUG(
+ dbgs() << Msg;
+ V.getNode()->dump(G);
+ );
+}
+
//===----------------------------------------------------------------------===//
// ConstantFPSDNode Class
//===----------------------------------------------------------------------===//
@@ -116,8 +125,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// ISD Namespace
//===----------------------------------------------------------------------===//
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
- bool AllowShrink) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@@ -126,10 +134,9 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
unsigned SplatBitSize;
bool HasUndefs;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
- unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
- MinSplatBits) &&
- EltSize >= SplatBitSize;
+ EltSize) &&
+ EltSize == SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
@@ -895,12 +902,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
}
void SelectionDAG::init(MachineFunction &NewMF,
- OptimizationRemarkEmitter &NewORE) {
+ OptimizationRemarkEmitter &NewORE,
+ Pass *PassPtr) {
MF = &NewMF;
+ SDAGISelPass = PassPtr;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
- Context = &MF->getFunction()->getContext();
+ Context = &MF->getFunction().getContext();
}
SelectionDAG::~SelectionDAG() {
@@ -1018,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
assert(!VT.isVector() &&
"getZeroExtendInReg should use the vector element type instead of "
"the vector type!");
- if (Op.getValueType() == VT) return Op;
+ if (Op.getValueType().getScalarType() == VT) return Op;
unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Imm = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
@@ -1156,7 +1165,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
- return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+
+ SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ return V;
}
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1176,11 +1187,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
}
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
+
return Result;
}
@@ -1222,6 +1235,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
+ NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
}
@@ -1317,7 +1331,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction()->optForSize()
+ Alignment = MF->getFunction().optForSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1471,7 +1485,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
- assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ assert(llvm::all_of(Mask,
+ [&](int M) { return M < (NElts * 2) && M >= -1; }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
@@ -1622,7 +1637,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
@@ -1665,15 +1682,20 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
MCSymbol *Label) {
+ return getLabelNode(ISD::EH_LABEL, dl, Root, Label);
+}
+
+SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
+ SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
- AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops);
ID.AddPointer(Label);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -1955,6 +1977,69 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
return SDValue();
}
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default:
+ break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal)
+ return getConstant(NewVal, SDLoc(V), V.getValueType());
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth())
+ break;
+ APInt NewMask = Mask << Amt;
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+ return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
+ V.getOperand(1));
+ }
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
+ return V.getOperand(0);
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ SDValue Src = V.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ // Being conservative here - only peek through if we only demand bits in the
+ // non-extended source (even though the extended bits are technically undef).
+ if (Mask.getActiveBits() > SrcBitWidth)
+ break;
+ APInt SrcMask = Mask.trunc(SrcBitWidth);
+ if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+ return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
+ break;
+ }
+ }
+ return SDValue();
+}
+
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
@@ -1972,6 +2057,30 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
return Mask.isSubsetOf(Known.Zero);
}
+/// Helper function that checks to see if a node is a constant or a
+/// build vector of splat constants at least within the demanded elts.
+static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
+ const APInt &DemandedElts) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return nullptr;
+ EVT VT = N.getValueType();
+ ConstantSDNode *Cst = nullptr;
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
+ if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
+ C->getValueType(0) != VT.getScalarType())
+ return nullptr;
+ Cst = C;
+ }
+ return Cst;
+}
+
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
@@ -2005,6 +2114,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned BitWidth = Op.getScalarValueSizeInBits();
Known = KnownBits(BitWidth); // Don't know anything.
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ // We know all of the bits for a constant!
+ Known.One = C->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return;
+ }
+ if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ // We know all of the bits for a constant fp!
+ Known.One = C->getValueAPF().bitcastToAPInt();
+ Known.Zero = ~Known.One;
+ return;
+ }
+
if (Depth == 6)
return; // Limit search depth.
@@ -2016,11 +2139,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case ISD::Constant:
- // We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
- break;
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
assert(NumElts == Op.getValueType().getVectorNumElements() &&
@@ -2045,7 +2163,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
break;
@@ -2083,7 +2201,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
@@ -2109,11 +2227,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
break;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ } else {
+ computeKnownBits(Sub, Known, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ computeKnownBits(Src, Known2, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ break;
+ }
case ISD::EXTRACT_SUBVECTOR: {
// If we know the element index, just demand that subvector elements,
// otherwise demand them all.
@@ -2132,10 +2284,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::BITCAST: {
SDValue N0 = Op.getOperand(0);
- unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+ EVT SubVT = N0.getValueType();
+ unsigned SubBitWidth = SubVT.getScalarSizeInBits();
- // Ignore bitcasts from floating point.
- if (!N0.getValueType().isInteger())
+ // Ignore bitcasts from unsupported types.
+ if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
break;
// Fast handling of 'identity' bitcasts.
@@ -2193,7 +2346,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
}
@@ -2264,22 +2417,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::SELECT:
- computeKnownBits(Op.getOperand(2), Known, Depth+1);
+ case ISD::VSELECT:
+ computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(1), Known2, Depth+1);
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- computeKnownBits(Op.getOperand(3), Known, Depth+1);
+ computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(2), Known2, Depth+1);
+ computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2308,35 +2462,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero <<= *ShAmt;
- Known.One <<= *ShAmt;
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero <<= Shift;
+ Known.One <<= Shift;
// Low bits are known zero.
- Known.Zero.setLowBits(ShAmt->getZExtValue());
+ Known.Zero.setLowBits(Shift);
}
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero.lshrInPlace(*ShAmt);
- Known.One.lshrInPlace(*ShAmt);
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero.lshrInPlace(Shift);
+ Known.One.lshrInPlace(Shift);
// High bits are known zero.
- Known.Zero.setHighBits(ShAmt->getZExtValue());
+ Known.Zero.setHighBits(Shift);
+ } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
+ // If the shift amount is a vector of constants see if we can bound
+ // the number of upper zero bits.
+ unsigned ShiftAmountMin = BitWidth;
+ for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
+ if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
+ const APInt &ShAmt = C->getAPIntValue();
+ if (ShAmt.ult(BitWidth)) {
+ ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
+ ShAmt.getZExtValue());
+ continue;
+ }
+ }
+ // Don't know anything.
+ ShiftAmountMin = 0;
+ break;
+ }
+
+ Known.Zero.setHighBits(ShiftAmountMin);
}
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero.lshrInPlace(*ShAmt);
- Known.One.lshrInPlace(*ShAmt);
- // If we know the value of the sign bit, then we know it is copied across
- // the high bits by the shift amount.
- APInt SignMask = APInt::getSignMask(BitWidth);
- SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
- if (Known.Zero.intersects(SignMask)) {
- Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
- } else if (Known.One.intersects(SignMask)) {
- Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
- }
+ unsigned Shift = ShAmt->getZExtValue();
+ // Sign extend known zero/one bit (else is unknown).
+ Known.Zero.ashrInPlace(Shift);
+ Known.One.ashrInPlace(Shift);
}
break;
case ISD::SIGN_EXTEND_INREG: {
@@ -2414,49 +2582,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
- computeKnownBits(Op.getOperand(0), Known,
- DemandedElts.zext(InVT.getVectorNumElements()),
- Depth + 1);
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+ computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
// TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
-
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, Depth+1);
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.zext(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.trunc(BitWidth);
break;
@@ -2755,7 +2907,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
@@ -2764,11 +2916,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
- if (unsigned Align = InferPtrAlignment(Op)) {
- // The low bits are known zero if the pointer is aligned.
- Known.Zero.setLowBits(Log2_32(Align));
- break;
- }
+ TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
break;
default:
@@ -2783,7 +2931,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
}
SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -2873,12 +3021,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth) const {
EVT VT = Op.getValueType();
- assert(VT.isInteger() && "Invalid VT!");
+ assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ const APInt &Val = C->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
if (Depth == 6)
return 1; // Limit search depth.
@@ -2894,11 +3047,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
return VTBits-Tmp;
- case ISD::Constant: {
- const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
- return Val.getNumSignBits();
- }
-
case ISD::BUILD_VECTOR:
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
@@ -2952,32 +3100,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
+ case ISD::BITCAST: {
+ SDValue N0 = Op.getOperand(0);
+ EVT SrcVT = N0.getValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types..
+ if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (VTBits == SrcBits)
+ return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ // TODO: Handle cases other than 'sign splat' when we have a use case.
+ // Requires handling of DemandedElts and Endianness.
+ if ((SrcBits % VTBits) == 0) {
+ assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+ Tmp = ComputeNumSignBits(N0, Depth + 1);
+ if (Tmp == SrcBits)
+ return VTBits;
+ }
+ break;
+ }
+
case ISD::SIGN_EXTEND:
- case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
- return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
-
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
// Max of the input and what this extends.
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
Tmp = VTBits-Tmp+1;
-
- Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
return std::max(Tmp, Tmp2);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+ Tmp = VTBits - SrcVT.getScalarSizeInBits();
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
+ }
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
- if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
APInt ShiftVal = C->getAPIntValue();
ShiftVal += Tmp;
Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
}
return Tmp;
case ISD::SHL:
- if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (C->getAPIntValue().uge(VTBits) || // Bad shift.
C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
return Tmp - C->getZExtValue();
@@ -2987,9 +3166,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::OR:
case ISD::XOR: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -2998,15 +3177,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SELECT:
- Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ case ISD::VSELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
return std::min(Tmp, Tmp2);
case ISD::SELECT_CC:
- Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1);
return std::min(Tmp, Tmp2);
+
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3041,16 +3222,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+ unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
if (Op.getOpcode() == ISD::ROTR)
- RotAmt = (VTBits-RotAmt) & (VTBits-1);
+ RotAmt = (VTBits - RotAmt) % VTBits;
// If we aren't rotating out all of the known-in sign bits, return the
// number that are left. This handles rotl(sext(x), 1) for example.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
}
break;
case ISD::ADD:
@@ -3391,7 +3572,9 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
? DAG.getZExtOrTrunc(Op, DL, SVT)
: DAG.getSExtOrTrunc(Op, DL, SVT);
- return DAG.getBuildVector(VT, DL, Elts);
+ SDValue V = DAG.getBuildVector(VT, DL, Elts);
+ NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
+ return V;
}
/// Gets or creates the specified node.
@@ -3407,7 +3590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -3768,7 +3953,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
@@ -3906,18 +4093,31 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
EVT SVT = VT.getScalarType();
+ EVT LegalSVT = SVT;
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
SmallVector<SDValue, 4> Outputs;
for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
SDValue V1 = BV1->getOperand(I);
SDValue V2 = BV2->getOperand(I);
- // Avoid BUILD_VECTOR nodes that perform implicit truncation.
- // FIXME: This is valid and could be handled by truncation.
+ if (SVT.isInteger()) {
+ if (V1->getValueType(0).bitsGT(SVT))
+ V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+ if (V2->getValueType(0).bitsGT(SVT))
+ V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ }
+
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
return SDValue();
// Fold one vector element.
SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
@@ -3936,6 +4136,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
return getBuildVector(VT, SDLoc(), Outputs);
}
+// TODO: Merge with FoldConstantArithmetic
SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
@@ -4027,7 +4228,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
ScalarResults.push_back(ScalarResult);
}
- return getBuildVector(VT, DL, ScalarResults);
+ SDValue V = getBuildVector(VT, DL, ScalarResults);
+ NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4297,6 +4500,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
}
}
+
+ // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
+ // when vector types are scalarized and v1iX is legal.
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+ if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().getVectorNumElements() == 1) {
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
+ N1.getOperand(1));
+ }
break;
case ISD::EXTRACT_ELEMENT:
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
@@ -4518,7 +4730,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4553,8 +4767,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
+ }
break;
}
case ISD::SELECT:
@@ -4626,7 +4842,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4882,8 +5100,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction()->optForMinSize();
- return MF.getFunction()->optForSize();
+ return MF.getFunction().optForMinSize();
+ return MF.getFunction().optForSize();
}
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
@@ -5558,21 +5776,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
- EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem, unsigned Size) {
+ EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+ MachineMemOperand::Flags Flags, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
- MachineFunction &MF = getMachineFunction();
- auto Flags = MachineMemOperand::MONone;
- if (WriteMem)
- Flags |= MachineMemOperand::MOStore;
- if (ReadMem)
- Flags |= MachineMemOperand::MOLoad;
- if (Vol)
- Flags |= MachineMemOperand::MOVolatile;
if (!Size)
Size = MemVT.getStoreSize();
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
@@ -5597,6 +5809,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
+ Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5622,7 +5836,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
@@ -5633,7 +5848,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
if (Ptr.getOpcode() != ISD::ADD ||
!isa<ConstantSDNode>(Ptr.getOperand(1)) ||
!isa<FrameIndexSDNode>(Ptr.getOperand(0)))
- return MachinePointerInfo();
+ return Info;
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
return MachinePointerInfo::getFixedStack(
@@ -5645,14 +5860,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
- return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
+ return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.isUndef())
- return InferPointerInfo(DAG, Ptr);
- return MachinePointerInfo();
+ return InferPointerInfo(Info, DAG, Ptr);
+ return Info;
}
SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
@@ -5672,7 +5888,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr, Offset);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5791,7 +6007,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5841,7 +6057,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -6118,7 +6334,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6171,7 +6389,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
createOperands(N, Ops);
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6580,14 +6800,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
+ bool IsTernary = false;
switch (OrigOpc) {
- default:
+ default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6614,10 +6836,14 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else if (IsTernary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2),
+ Node->getOperand(3)});
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });
-
+
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
// updates the node in place to have the requested operands.
@@ -6630,7 +6856,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
RemoveDeadNode(Node);
}
- return Res;
+ return Res;
}
/// getMachineNode - These are used for target selectors to create a new node
@@ -6794,32 +7020,125 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
-SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
- unsigned R, bool IsIndirect, uint64_t Off,
+SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
+ SDNode *N, unsigned R, bool IsIndirect,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
- SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+ SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O);
}
/// Constant
-SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
- const Value *C, uint64_t Off,
+SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ const Value *C,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O);
}
/// FrameIndex
-SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
- unsigned FI, uint64_t Off,
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+}
+
+void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
+ unsigned OffsetInBits, unsigned SizeInBits,
+ bool InvalidateDbg) {
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ assert(FromNode && ToNode && "Can't modify dbg values");
+
+ // PR35338
+ // TODO: assert(From != To && "Redundant dbg value transfer");
+ // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer");
+ if (From == To || FromNode == ToNode)
+ return;
+
+ if (!FromNode->getHasDebugValue())
+ return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
+ if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ continue;
+
+ // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
+
+ // Just transfer the dbg value attached to From.
+ if (Dbg->getResNo() != From.getResNo())
+ continue;
+
+ DIVariable *Var = Dbg->getVariable();
+ auto *Expr = Dbg->getExpression();
+ // If a fragment is requested, update the expression.
+ if (SizeInBits) {
+ // When splitting a larger (e.g., sign-extended) value whose
+ // lower bits are described with an SDDbgValue, do not attempt
+ // to transfer the SDDbgValue to the upper bits.
+ if (auto FI = Expr->getFragmentInfo())
+ if (OffsetInBits + SizeInBits > FI->SizeInBits)
+ continue;
+ auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits,
+ SizeInBits);
+ if (!Fragment)
+ continue;
+ Expr = *Fragment;
+ }
+ // Clone the SDDbgValue and move it to To.
+ SDDbgValue *Clone =
+ getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+
+ if (InvalidateDbg)
+ Dbg->setIsInvalidated();
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, ToNode, false);
+}
+
+void SelectionDAG::salvageDebugInfo(SDNode &N) {
+ if (!N.getHasDebugValue())
+ return;
+ for (auto DV : GetDbgValues(&N)) {
+ if (DV->isInvalidated())
+ continue;
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if (!isConstantIntBuildVectorOrConstantInt(N0) &&
+ isConstantIntBuildVectorOrConstantInt(N1)) {
+ uint64_t Offset = N.getConstantOperandVal(1);
+ // Rewrite an ADD constant node into a DIExpression. Since we are
+ // performing arithmetic to compute the variable's *value* in the
+ // DIExpression, we need to mark the expression with a
+ // DW_OP_stack_value.
+ auto *DIExpr = DV->getExpression();
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
+ DIExpression::NoDeref,
+ DIExpression::WithStackValue);
+ SDDbgValue *Clone =
+ getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ DV->setIsInvalidated();
+ AddDbgValue(Clone, N0.getNode(), false);
+ DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
+ }
+ }
+ }
}
namespace {
@@ -6859,7 +7178,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
assert(From != To.getNode() && "Cannot replace uses of with self");
// Preserve Debug Values
- TransferDbgValues(FromN, To);
+ transferDbgValues(FromN, To);
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
@@ -6918,7 +7237,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
if (From->hasAnyUseOfValue(i)) {
assert((i < To->getNumValues()) && "Invalid To location");
- TransferDbgValues(SDValue(From, i), SDValue(To, i));
+ transferDbgValues(SDValue(From, i), SDValue(To, i));
}
// Iterate over just the existing users of From. See the comments in
@@ -6962,7 +7281,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// Preserve Debug Info.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
- TransferDbgValues(SDValue(From, i), *To);
+ transferDbgValues(SDValue(From, i), *To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7009,7 +7328,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
}
// Preserve Debug Info.
- TransferDbgValues(From, To);
+ transferDbgValues(From, To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7087,7 +7406,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
if (Num == 1)
return ReplaceAllUsesOfValueWith(*From, *To);
- TransferDbgValues(*From, *To);
+ transferDbgValues(*From, *To);
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -7236,35 +7555,6 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
-/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
-void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
- if (From == To || !From.getNode()->getHasDebugValue())
- return;
- SDNode *FromNode = From.getNode();
- SDNode *ToNode = To.getNode();
- ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
- SmallVector<SDDbgValue *, 2> ClonedDVs;
- for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
- I != E; ++I) {
- SDDbgValue *Dbg = *I;
- // Only add Dbgvalues attached to same ResNo.
- if (Dbg->getKind() == SDDbgValue::SDNODE &&
- Dbg->getSDNode() == From.getNode() &&
- Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
- assert(FromNode != ToNode &&
- "Should not transfer Debug Values intranode");
- SDDbgValue *Clone =
- getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
- To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
- Dbg->getDebugLoc(), Dbg->getOrder());
- ClonedDVs.push_back(Clone);
- Dbg->setIsInvalidated();
- }
- }
- for (SDDbgValue *I : ClonedDVs)
- AddDbgValue(I, ToNode, false);
-}
-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 0d69441ebb7f..544da362be69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,5 +1,4 @@
-//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
-//Analysis ---*- C++ -*-===//
+//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,15 +6,18 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
-namespace llvm {
+using namespace llvm;
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
@@ -55,7 +57,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
/// Parses tree in Ptr for base, index, offset addresses.
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
// (((B + I*M) + c)) + c ...
- SDValue Base = Ptr;
+ SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
SDValue Index = SDValue();
int64_t Offset = 0;
bool IsIndexSignExt = false;
@@ -112,4 +114,3 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
}
return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
}
-} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 127312076207..71cb8cb78f6d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,63 +12,113 @@
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
-#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <tuple>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -78,11 +128,18 @@ using namespace llvm;
static unsigned LimitFloatPrecision;
static cl::opt<unsigned, true>
-LimitFPPrecision("limit-float-precision",
- cl::desc("Generate low-precision inline sequences "
- "for some float libcalls"),
- cl::location(LimitFloatPrecision),
- cl::init(0));
+ LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<unsigned> SwitchPeelThreshold(
+ "switch-peel-threshold", cl::Hidden, cl::init(66),
+ cl::desc("Set the case probability threshold for peeling the case from a "
+ "switch statement. A value greater than 100 will void this "
+ "optimization"));
+
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
@@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64;
// True if the Value passed requires ABI mangling as it is a parameter to a
// function or a return value from a function which is not an intrinsic.
-static bool isABIRegCopy(const Value * V) {
+static bool isABIRegCopy(const Value *V) {
const bool IsRetInst = V && isa<ReturnInst>(V);
const bool IsCallInst = V && isa<CallInst>(V);
const bool IsInLineAsm =
@@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
bool IsABIRegCopy) {
-
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Val = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
} else {
assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
"lossy conversion of vector to scalar type");
@@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
}
-RegsForValue::RegsForValue() { IsABIMangled = false; }
-
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
EVT valuevt, bool IsABIMangledValue)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
@@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (Code == InlineAsm::Kind_Clobber) {
+ // Clobbers should always have a 1:1 mapping with registers, and may
+ // reference registers that have illegal (e.g. vector) types. Hence, we
+ // shouldn't try to apply any sort of splitting logic to them.
+ assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
+ "No 1:1 mapping from clobbers to regs?");
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ (void)SP;
+ for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
+ Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
+ assert(
+ (Regs[I] != SP ||
+ DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
+ "If we clobbered the stack pointer, MFI should know about it.");
+ }
+ return;
+ }
+
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
MVT RegisterVT = RegVTs[Value];
@@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
assert(Reg < Regs.size() && "Mismatch in # registers expected");
unsigned TheReg = Regs[Reg++];
Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
-
- if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
- // If we clobbered the stack pointer, MFI should know about it.
- assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
- }
}
}
}
@@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DIExpression *Expr = DI->getExpression();
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
- uint64_t Offset = DI->getOffset();
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
- Val)) {
- SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+ SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
- ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
+ ComputeValueVTs(TLI, DL,
+ F->getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
@@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
- // An aggregate return value cannot wrap around the address space, so
- // offsets to its parts don't wrap either.
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
-
SmallVector<SDValue, 4> Chains(NumValues);
for (unsigned i = 0; i != NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
- RetPtr.getValueType(), RetPtr,
- DAG.getIntPtrConstant(Offsets[i],
- getCurSDLoc()),
- Flags);
- Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
- SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- // FIXME: better loc info would be nice.
- Add, MachinePointerInfo());
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT(TLI.getPointerTy(DL))));
}
- bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
CallingConv::ID CallConv =
- DAG.getMachineFunction().getFunction()->getCallingConv();
+ DAG.getMachineFunction().getFunction().getCallingConv();
Chain = DAG.getTargetLoweringInfo().LowerReturn(
Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
@@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
-///
void
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *TBB,
@@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
- TBB, FBB, CurBB, TProb, FProb);
+ TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SwitchCases.push_back(CB);
return;
}
@@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, TBB, FBB, CurBB, TProb, FProb);
+ nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- BOpc != Opc || !BOp->hasOneUse() ||
+ BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
@@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// je foo
// cmp D, E
// jle foo
- //
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
@@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, Succ0MBB, Succ1MBB, BrMBB);
+ nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
// Use visitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
@@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB) {
SDValue Cond;
SDValue CondLHS = getValue(CB.CmpLHS);
- SDLoc dl = getCurSDLoc();
+ SDLoc dl = CB.DL;
// Build the setcc now.
if (!CB.CmpMHS) {
@@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
@@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue Guard;
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
- const Module &M = *ParentBB->getParent()->getFunction()->getParent();
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
- SDValue StackSlot = DAG.getLoad(
+ SDValue GuardVal = DAG.getLoad(
PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
+ if (TLI.useStackGuardXorFP())
+ GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
+
// Retrieve guard check function, nullptr if instrumentation is inlined.
if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
@@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Node = StackSlot;
+ Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
@@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
@@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// If the sub is not 0, then we know the guard/stackslot do not equal, so
// branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, StackSlot.getOperand(0),
+ MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
@@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) {
case Instruction::FAdd:
case Instruction::FMul:
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (FPOp->getFastMathFlags().unsafeAlgebra())
+ if (FPOp->getFastMathFlags().isFast())
break;
LLVM_FALLTHROUGH;
default:
@@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) {
if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
+ if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
return false;
UsersToVisit.push_back(U);
} else if (const ShuffleVectorInst *ShufInst =
@@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoInfs(FMF.noInfs());
Flags.setNoNaNs(FMF.noNaNs());
Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+ Flags.setUnsafeAlgebra(FMF.isFast());
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
@@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
// Check if the condition of the select has one use or two users that are both
// selects with the same condition.
static bool hasOnlySelectUsers(const Value *Cond) {
- return all_of(Cond->users(), [](const Value *V) {
+ return llvm::all_of(Cond->users(), [](const Value *V) {
return isa<SelectInst>(V);
});
}
@@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- AllocSize = DAG.getNode(ISD::ADD, dl,
- AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
- AllocSize = DAG.getNode(ISD::AND, dl,
- AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
- dl));
+ AllocSize =
+ DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
- SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
+ SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
@@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
//
// When the first GEP operand is a single pointer - it is the uniform base we
// are looking for. If first operand of the GEP is a splat vector - we
-// extract the spalt value and use it as a uniform base.
+// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-//
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
-
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP || GEP->getNumOperands() > 2)
+ if (!GEP)
return false;
const Value *GEPPtr = GEP->getPointerOperand();
@@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Value *IndexVal = GEP->getOperand(1);
+ unsigned FinalIndex = GEP->getNumOperands() - 1;
+ Value *IndexVal = GEP->getOperand(FinalIndex);
+
+ // Ensure all the other indices are 0.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!C || !C->isZero())
+ return false;
+ }
// The operands of the GEP may be defined in another basic block.
// In this case we'll not find nodes for the operands.
@@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
- // Suppress sign extension.
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
- if (SDB->findValue(Sext->getOperand(0))) {
- IndexVal = Sext->getOperand(0);
- Index = SDB->getValue(IndexVal);
- }
- }
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
@@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- if (I.getAlignment() < VT.getSizeInBits() / 8)
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlignment() < VT.getStoreSize())
report_fatal_error("Cannot generate unaligned atomic load");
MachineMemOperand *MMO =
@@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT VT =
TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < VT.getSizeInBits() / 8)
+ if (I.getAlignment() < VT.getStoreSize())
report_fatal_error("Cannot generate unaligned atomic store");
SDValue OutChain =
@@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Info is set by getTgtMemInstrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+ DAG.getMachineFunction(),
+ Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
@@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
- VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.vol,
- Info.readMem, Info.writeMem, Info.size);
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
+ Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
+ Info.flags, Info.size);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
}
-
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
@@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->optForSize() ||
+ const Function &F = DAG.getMachineFunction().getFunction();
+ if (!F.optForSize() ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) {
}
}
-/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
-/// argument, create the corresponding DBG_VALUE machine instruction for it now.
-/// At the end of instruction selection, they will be inserted to the entry BB.
+/// If the DbgValueInst is a dbg_value of a function argument, create the
+/// corresponding DBG_VALUE machine instruction for it now. At the end of
+/// instruction selection, they will be inserted to the entry BB.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) {
+ DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- // Ignore inlined function arguments here.
- //
- // FIXME: Should we be checking DL->inlinedAt() to determine this?
- if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
- return false;
-
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
- if (FI != INT_MAX)
+ if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
@@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
}
+ if (!Op && N.getNode())
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ Op = MachineOperand::CreateFI(FINode->getIndex());
+
if (!Op) {
// Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
+ V->getType(), isABIRegCopy(V));
+ unsigned NumRegs =
+ std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
+ if (NumRegs > 1) {
+ unsigned I = 0;
+ unsigned Offset = 0;
+ auto RegisterVT = RFV.RegVTs.begin();
+ for (auto RegCount : RFV.RegCount) {
+ unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I) {
+ // The vregs are guaranteed to be allocated in sequence.
+ Op = MachineOperand::CreateReg(VMI->second + I, false);
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegisterSize);
+ if (!FragmentExpr)
+ continue;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
+ Op->getReg(), Variable, *FragmentExpr));
+ Offset += RegisterSize;
+ }
+ }
+ return true;
+ }
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
}
}
- if (!Op && N.getNode())
- // Check if frame index is available.
- if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
- if (FrameIndexSDNode *FINode =
- dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- Op = MachineOperand::CreateFI(FINode->getIndex());
-
if (!Op)
return false;
@@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (Op->isReg())
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- Op->getReg(), Offset, Variable, Expr));
+ Op->getReg(), Variable, Expr));
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.add(*Op)
- .addImm(Offset)
+ .addImm(0)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset,
+ DIExpression *Expr,
const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations as such instead of as indirectly addressed
// locations.
- return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl,
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
DbgSDNodeOrder);
}
- return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
- Offset, dl, DbgSDNodeOrder);
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
+ DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memcpy_element_unordered_atomic: {
- const ElementUnorderedAtomicMemCpyInst &MI =
- cast<ElementUnorderedAtomicMemCpyInst>(I);
+ const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
@@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove_element_unordered_atomic: {
- auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
+ auto &MI = cast<AtomicMemMoveInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
@@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset_element_unordered_atomic: {
- auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
+ auto &MI = cast<AtomicMemSetInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
@@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(CallResult.second);
return nullptr;
}
+ case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
- const Value *Address = DI.getAddress();
assert(Variable && "Missing variable");
- if (!Address) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return nullptr;
- }
// Check if address has undef value.
- if (isa<UndefValue>(Address) ||
+ const Value *Address = DI.getVariableLocation();
+ if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return nullptr;
}
- // Byval arguments with frame indices were already handled after argument
- // lowering and before isel.
- const auto *Arg =
- dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
- if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+
+ // Check if this variable can be described by a frame index, typically
+ // either as a static alloca or a byval parameter.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI =
+ dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
+ if (AI->isStaticAlloca()) {
+ auto I = FuncInfo.StaticAllocaMap.find(AI);
+ if (I != FuncInfo.StaticAllocaMap.end())
+ FI = I->second;
+ }
+ } else if (const auto *Arg = dyn_cast<Argument>(
+ Address->stripInBoundsConstantOffsets())) {
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+ }
+
+ // llvm.dbg.addr is control dependent and always generates indirect
+ // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
+ // the MachineFunction variable table.
+ if (FI != std::numeric_limits<int>::max()) {
+ if (Intrinsic == Intrinsic::dbg_addr)
+ DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
+ SDNodeOrder),
+ getRoot().getNode(), isParameter);
return nullptr;
+ }
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
@@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->isParameter() || isa<Argument>(Address);
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
- FINode->getIndex(), 0, dl, SDNodeOrder);
+ FINode->getIndex(), dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
return nullptr;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- true, 0, dl, SDNodeOrder);
+ true, dl, SDNodeOrder);
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
@@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
- uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
return nullptr;
SDDbgValue *SDV;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
- SDNodeOrder);
+ SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
}
@@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false,
- N))
+ if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
return nullptr;
- SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
+ SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
return nullptr;
}
@@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
return nullptr;
- case Intrinsic::eh_dwarf_cfa: {
+ case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
- }
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Op.getValue(1));
return nullptr;
}
- case Intrinsic::eh_sjlj_longjmp: {
+ case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
- }
- case Intrinsic::eh_sjlj_setup_dispatch: {
+ case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
return nullptr;
- }
-
case Intrinsic::masked_gather:
visitMaskedGather(I);
return nullptr;
@@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res.getValue(1));
return nullptr;
}
- case Intrinsic::stackrestore: {
+ case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return nullptr;
- }
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::stackguard: {
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- const Module &M = *MF.getFunction()->getParent();
+ const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
@@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
}
+ if (TLI.useStackGuardXorFP())
+ Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
return nullptr;
@@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::assume:
case Intrinsic::var_annotation:
- // Discard annotate attributes and assumptions
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
return nullptr;
+ case Intrinsic::codeview_annotation: {
+ // Emit a label associated with this metadata.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MCSymbol *Label =
+ MF.getMMI().getContext().createTempSymbol("annotation", true);
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
+ Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
+ DAG.setRoot(Res);
+ return nullptr;
+ }
+
case Intrinsic::init_trampoline: {
const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
@@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res);
return nullptr;
}
- case Intrinsic::adjust_trampoline: {
+ case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
- }
case Intrinsic::gcroot: {
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- (void)F;
- assert(F->hasGC() &&
+ assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
assert(GFI && "implied by previous");
const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
@@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
return nullptr;
- case Intrinsic::expect: {
+ case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
return nullptr;
- }
case Intrinsic::debugtrap:
case Intrinsic::trap: {
@@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
@@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
EVT::getIntegerVT(*Context, 8),
MachinePointerInfo(I.getArgOperand(0)),
0, /* align */
- false, /* volatile */
- rw==0, /* read */
- rw==1)); /* write */
+ Flags));
return nullptr;
}
case Intrinsic::lifetime_start:
@@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::donothing:
// ignore
return nullptr;
- case Intrinsic::experimental_stackmap: {
+ case Intrinsic::experimental_stackmap:
visitStackmap(I);
return nullptr;
- }
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64: {
+ case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I);
return nullptr;
- }
- case Intrinsic::experimental_gc_statepoint: {
+ case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I));
return nullptr;
- }
- case Intrinsic::experimental_gc_result: {
+ case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
return nullptr;
- }
- case Intrinsic::experimental_gc_relocate: {
+ case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return nullptr;
- }
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
@@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
- unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
+ unsigned IdxVal =
+ unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
@@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return nullptr;
}
-
- }
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
+ else if (FPI.isTernaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)),
+ getValue(FPI.getArgOperand(2)) });
else
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
@@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
if (MF.hasEHFunclets()) {
assert(CLI.CS);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
BeginLabel, EndLabel);
} else {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
@@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SelectionDAGBuilder &Builder) {
-
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
@@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
- // some reason.
+ // some reason or the call site requires strict floating point semantics.
LibFunc Func;
- if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(*F, Func) &&
+ if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
+ F->hasName() && LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
@@ -6735,7 +6828,7 @@ public:
RegsForValue AssignedRegs;
explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
- : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
}
/// Whether or not this operand accesses memory
@@ -6767,7 +6860,7 @@ public:
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
- llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
@@ -6799,7 +6892,7 @@ public:
}
};
-typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
} // end anonymous namespace
@@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-///
static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo) {
@@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
return true;
}
+namespace {
+
class ExtraFlags {
unsigned Flags = 0;
@@ -7028,7 +7122,7 @@ public:
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
- void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
// Ideally, we would only check against memory constraints. However, the
// meaning of an Other constraint can be target-specific and we can't easily
// reason about it. Therefore, be conservative and set MayLoad/MayStore
@@ -7047,8 +7141,9 @@ public:
unsigned get() const { return Flags; }
};
+} // end anonymous namespace
+
/// visitInlineAsm - Handle a call to an InlineAsm object.
-///
void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
RegsForValue RetValRegs;
// IndirectStoresToEmit - The set of stores to emit after the inline asm node.
- std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+ std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
switch (OpInfo.Type) {
- case InlineAsm::isOutput: {
+ case InlineAsm::isOutput:
if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
OpInfo.ConstraintType != TargetLowering::C_Register) {
// Memory output, or 'other' output (e.g. 'X' constraint).
@@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
: InlineAsm::Kind_RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
break;
- }
+
case InlineAsm::isInput: {
SDValue InOperandVal = OpInfo.CallOperand;
@@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
dl, DAG, AsmNodeOperands);
break;
}
- case InlineAsm::isClobber: {
+ case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
@@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands);
break;
}
- }
}
// Finish up input operands. Set the input chain and add the flag last.
@@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
return;
}
- std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+ std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
// Process indirect outputs, first output all of the flagged copies out of
// physregs.
@@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
switch (Intrinsic) {
case Intrinsic::experimental_vector_reduce_fadd:
- if (FMF.unsafeAlgebra())
+ if (FMF.isFast())
Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
break;
case Intrinsic::experimental_vector_reduce_fmul:
- if (FMF.unsafeAlgebra())
+ if (FMF.isFast())
Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
@@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
case Intrinsic::experimental_vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_fmax: {
+ case Intrinsic::experimental_vector_reduce_fmax:
Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
- }
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::experimental_vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
- }
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
@@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
uint64_t Offset = OldOffsets[i];
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
- unsigned RegisterVTSize = RegisterVT.getSizeInBits();
+ unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
- Offsets.push_back(Offset + j * RegisterVTSize);
+ Offsets.push_back(Offset + j * RegisterVTByteSZ);
}
}
@@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.NumFixedArgs += 1;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
@@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
- true);
+ CLI.CS.getInstruction(), ExtendKind, true);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+ Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
-typedef DenseMap<const Argument *,
- std::pair<const AllocaInst *, const StoreInst *>>
- ArgCopyElisionMapTy;
+using ArgCopyElisionMapTy =
+ DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>;
/// Scan the entry block of the function in FuncInfo for arguments that look
/// like copies into a local alloca. Record any copied arguments in
@@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
@@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
@@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ // We want to associate the argument with the frame index, among
+ // involved operands, that correspond to the lowest address. The
+ // getCopyFromParts function, called earlier, is swapping the order of
+ // the operands to BUILD_PAIR depending on endianness. The result of
+ // that swapping is that the least significant bits of the argument will
+ // be in the first operand of the BUILD_PAIR node, and the most
+ // significant bits will be in the second operand.
+ unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
if (LoadSDNode *LNode =
- dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// directly add them, because expansion might result in multiple MBB's for one
/// BB. As such, the start of the BB might correspond to a different MBB than
/// the end.
-///
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
@@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
BitTestInfo BTI;
std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
- // Sort by probability first, number of bits second.
+ // Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
- return a.Bits > b.Bits;
+ if (a.Bits != b.Bits)
+ return a.Bits > b.Bits;
+ return a.Mask < b.Mask;
});
for (auto &CB : CBV) {
@@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
if (TM.getOptLevel() != CodeGenOpt::None) {
- // Order cases by probability so the most likely case will be checked first.
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
std::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
- return a.Prob > b.Prob;
+ return a.Prob != b.Prob ?
+ a.Prob > b.Prob :
+ a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
@@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
// The false probability is the sum of all unhandled cases.
- CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
- UnhandledProbs);
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
+ getCurSDLoc(), I->Prob, UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
I++;
}
- for (;;) {
+ while (true) {
// Our binary search tree differs from a typical BST in that ours can have up
// to three values in each leaf. The pivot selection above doesn't take that
// into account, which means the tree might require more nodes and be less
@@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
- LeftProb, RightProb);
+ getCurSDLoc(), LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
SwitchCases.push_back(CB);
}
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+ BranchProbability PeeledCaseProb) {
+ if (PeeledCaseProb == BranchProbability::getOne())
+ return BranchProbability::getZero();
+ BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+
+ uint32_t Numerator = CaseProb.getNumerator();
+ uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
+ return BranchProbability(Numerator, std::max(Numerator, Denominator));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+ const SwitchInst &SI, CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ // Don't perform if there is only one cluster or optimizing for size.
+ if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+ TM.getOptLevel() == CodeGenOpt::None ||
+ SwitchMBB->getParent()->getFunction().optForMinSize())
+ return SwitchMBB;
+
+ BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+ unsigned PeeledCaseIndex = 0;
+ bool SwitchPeeled = false;
+ for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+ CaseCluster &CC = Clusters[Index];
+ if (CC.Prob < TopCaseProb)
+ continue;
+ TopCaseProb = CC.Prob;
+ PeeledCaseIndex = Index;
+ SwitchPeeled = true;
+ }
+ if (!SwitchPeeled)
+ return SwitchMBB;
+
+ DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
+ << "\n");
+
+ // Record the MBB for the peeled switch statement.
+ MachineFunction::iterator BBI(SwitchMBB);
+ ++BBI;
+ MachineBasicBlock *PeeledSwitchMBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+ ExportFromCurrentBlock(SI.getCondition());
+ auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+ SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+ nullptr, nullptr, TopCaseProb.getCompl()};
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+ Clusters.erase(PeeledCaseIt);
+ for (CaseCluster &CC : Clusters) {
+ DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
+ CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+ DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ }
+ PeeledCaseProb = TopCaseProb;
+ return PeeledSwitchMBB;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Extract cases from the switch.
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
}
+ // The branch probablity of the peeled case.
+ BranchProbability PeeledCaseProb = BranchProbability::getZero();
+ MachineBasicBlock *PeeledSwitchMBB =
+ peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
// If there is only the default destination, jump there directly.
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
if (Clusters.empty()) {
+ assert(PeeledSwitchMBB == SwitchMBB);
SwitchMBB->addSuccessor(DefaultMBB);
if (DefaultMBB != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
@@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+ auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+ // Scale the branchprobability for DefaultMBB if the peel occurs and
+ // DefaultMBB is not replaced.
+ if (PeeledCaseProb != BranchProbability::getZero() &&
+ DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+ DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+ WorkList.push_back(
+ {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
@@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
- !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
+ !DefaultMBB->getParent()->getFunction().optForMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ac1d6aae65a5..9e7c2bc6821b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
+//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,67 +16,75 @@
#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <utility>
#include <vector>
namespace llvm {
-class AddrSpaceCastInst;
class AllocaInst;
+class AtomicCmpXchgInst;
+class AtomicRMWInst;
class BasicBlock;
-class BitCastInst;
class BranchInst;
class CallInst;
+class CatchPadInst;
+class CatchReturnInst;
+class CatchSwitchInst;
+class CleanupPadInst;
+class CleanupReturnInst;
+class Constant;
+class ConstantInt;
+class ConstrainedFPIntrinsic;
class DbgValueInst;
-class ExtractElementInst;
-class FCmpInst;
-class FPExtInst;
-class FPToSIInst;
-class FPToUIInst;
-class FPTruncInst;
-class Function;
+class DataLayout;
+class DIExpression;
+class DILocalVariable;
+class DILocation;
+class FenceInst;
class FunctionLoweringInfo;
-class GetElementPtrInst;
class GCFunctionInfo;
-class ICmpInst;
-class IntToPtrInst;
+class GCRelocateInst;
+class GCResultInst;
class IndirectBrInst;
class InvokeInst;
-class InsertElementInst;
-class Instruction;
+class LandingPadInst;
+class LLVMContext;
class LoadInst;
class MachineBasicBlock;
-class MachineInstr;
-class MachineRegisterInfo;
-class MDNode;
-class MVT;
class PHINode;
-class PtrToIntInst;
+class ResumeInst;
class ReturnInst;
class SDDbgValue;
-class SExtInst;
-class SelectInst;
-class ShuffleVectorInst;
-class SIToFPInst;
class StoreInst;
class SwitchInst;
-class DataLayout;
class TargetLibraryInfo;
-class TargetLowering;
-class TruncInst;
-class UIToFPInst;
-class UnreachableInst;
+class TargetMachine;
+class Type;
class VAArgInst;
-class ZExtInst;
+class UnreachableInst;
+class Use;
+class User;
+class Value;
//===----------------------------------------------------------------------===//
/// SelectionDAGBuilder - This is the common target-independent lowering
@@ -84,7 +92,7 @@ class ZExtInst;
///
class SelectionDAGBuilder {
/// CurInst - The current instruction being visited
- const Instruction *CurInst;
+ const Instruction *CurInst = nullptr;
DenseMap<const Value*, SDValue> NodeMap;
@@ -94,13 +102,15 @@ class SelectionDAGBuilder {
/// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- const DbgValueInst* DI;
+ const DbgValueInst* DI = nullptr;
DebugLoc dl;
- unsigned SDNodeOrder;
+ unsigned SDNodeOrder = 0;
+
public:
- DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo() = default;
DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
: DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
+
const DbgValueInst* getDI() { return DI; }
DebugLoc getdl() { return dl; }
unsigned getSDNodeOrder() { return SDNodeOrder; }
@@ -120,8 +130,8 @@ public:
/// State used while lowering a statepoint sequence (gc_statepoint,
/// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
StatepointLoweringState StatepointLowering;
-private:
+private:
/// PendingExports - CopyToReg nodes that copy values to virtual registers
/// for export to other blocks need to be emitted before any terminator
/// instruction, but they have no other ordering requirements. We bunch them
@@ -189,23 +199,22 @@ private:
}
};
- typedef std::vector<CaseCluster> CaseClusterVector;
- typedef CaseClusterVector::iterator CaseClusterIt;
+ using CaseClusterVector = std::vector<CaseCluster>;
+ using CaseClusterIt = CaseClusterVector::iterator;
struct CaseBits {
- uint64_t Mask;
- MachineBasicBlock* BB;
- unsigned Bits;
+ uint64_t Mask = 0;
+ MachineBasicBlock* BB = nullptr;
+ unsigned Bits = 0;
BranchProbability ExtraProb;
+ CaseBits() = default;
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
BranchProbability Prob):
- Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
-
- CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
};
- typedef std::vector<CaseBits> CaseBitsVector;
+ using CaseBitsVector = std::vector<CaseBits>;
/// Sort Clusters and merge adjacent cases.
void sortAndRangeify(CaseClusterVector &Clusters);
@@ -214,15 +223,6 @@ private:
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
/// blocks needed by multi-case switch statements.
struct CaseBlock {
- CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle, MachineBasicBlock *truebb,
- MachineBasicBlock *falsebb, MachineBasicBlock *me,
- BranchProbability trueprob = BranchProbability::getUnknown(),
- BranchProbability falseprob = BranchProbability::getUnknown())
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
- FalseProb(falseprob) {}
-
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
@@ -237,14 +237,25 @@ private:
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
+ /// The debug location of the instruction this CaseBlock was
+ /// produced from.
+ SDLoc DL;
+
// TrueProb/FalseProb - branch weights.
BranchProbability TrueProb, FalseProb;
+
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ SDLoc dl,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
+ TrueProb(trueprob), FalseProb(falseprob) {}
};
struct JumpTable {
- JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
- MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-
/// Reg - the virtual register containing the index of the jump table entry
//. to jump to.
unsigned Reg;
@@ -255,39 +266,38 @@ private:
/// Default - the MBB of the default bb, which is a successor of the range
/// check MBB. This is when updating PHI nodes in successors.
MachineBasicBlock *Default;
+
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
};
struct JumpTableHeader {
- JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
- bool E = false)
- : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E) {}
APInt First;
APInt Last;
const Value *SValue;
MachineBasicBlock *HeaderBB;
bool Emitted;
+
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false)
+ : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+ Emitted(E) {}
};
- typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+ using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
struct BitTestCase {
- BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- BranchProbability Prob):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
BranchProbability ExtraProb;
+
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
};
- typedef SmallVector<BitTestCase, 3> BitTestInfo;
+ using BitTestInfo = SmallVector<BitTestCase, 3>;
struct BitTestBlock {
- BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
- bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
- BitTestInfo C, BranchProbability Pr)
- : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
- RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
@@ -300,6 +310,13 @@ private:
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
+
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+ RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+ Cases(std::move(C)), Prob(Pr) {}
};
/// Return the range of value in [First..Last].
@@ -336,7 +353,7 @@ private:
const ConstantInt *LT;
BranchProbability DefaultProb;
};
- typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+ using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
/// Determine the rank by weight of CC in [First,Last]. If CC has more weight
/// than each cluster in the range, its rank is 0.
@@ -352,6 +369,10 @@ private:
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
+ /// Peel the top probability case if it exceeds the threshold
+ MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
+ CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
@@ -466,8 +487,7 @@ private:
/// the same function, use the same failure basic block).
class StackProtectorDescriptor {
public:
- StackProtectorDescriptor()
- : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {}
+ StackProtectorDescriptor() = default;
/// Returns true if all fields of the stack protector descriptor are
/// initialized implying that we should/are ready to emit a stack protector.
@@ -533,15 +553,15 @@ private:
/// replace it with a compare/branch to the successor mbbs
/// SuccessMBB/FailureMBB depending on whether or not the stack protector
/// was violated.
- MachineBasicBlock *ParentMBB;
+ MachineBasicBlock *ParentMBB = nullptr;
/// A basic block visited on stack protector check success that contains the
/// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB;
+ MachineBasicBlock *SuccessMBB = nullptr;
/// This basic block visited on stack protector check failure that will
/// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB;
+ MachineBasicBlock *FailureMBB = nullptr;
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
@@ -554,25 +574,29 @@ private:
private:
const TargetMachine &TM;
+
public:
/// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
/// nodes without a corresponding SDNode.
static const unsigned LowestSDNodeOrder = 1;
SelectionDAG &DAG;
- const DataLayout *DL;
- AliasAnalysis *AA;
+ const DataLayout *DL = nullptr;
+ AliasAnalysis *AA = nullptr;
const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<CaseBlock> SwitchCases;
+
/// JTCases - Vector of JumpTable structures used to communicate
/// SwitchInst code generation information.
std::vector<JumpTableBlock> JTCases;
+
/// BitTestCases - Vector of BitTestBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<BitTestBlock> BitTestCases;
+
/// A StackProtectorDescriptor structure used to communicate stack protector
/// information in between SelectBasicBlock and FinishBasicBlock.
StackProtectorDescriptor SPDescriptor;
@@ -589,22 +613,19 @@ public:
GCFunctionInfo *GFI;
/// LPadToCallSiteMap - Map a landing pad to the call site indexes.
- DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+ DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
/// HasTailCall - This is set to true if a call in the current
/// block has been translated as a tail call. In this case,
/// no subsequent DAG nodes should be created.
- ///
- bool HasTailCall;
+ bool HasTailCall = false;
LLVMContext *Context;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
- : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
- HasTailCall(false) {
- }
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ FuncInfo(funcinfo) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
@@ -653,6 +674,7 @@ public:
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
@@ -923,13 +945,12 @@ private:
void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
- /// EmitFuncArgumentDbgValue - If V is an function argument then create
- /// corresponding DBG_VALUE machine instruction for it now. At the end of
- /// instruction selection, they will be inserted to the entry BB.
+ /// If V is an function argument then create corresponding DBG_VALUE machine
+ /// instruction for it now. At the end of instruction selection, they will be
+ /// inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- int64_t Offset, bool IsDbgDeclare,
- const SDValue &N);
+ bool IsDbgDeclare, const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
@@ -940,8 +961,8 @@ private:
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset,
- const DebugLoc &dl, unsigned DbgSDNodeOrder);
+ DIExpression *Expr, const DebugLoc &dl,
+ unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
@@ -978,13 +999,11 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- bool IsABIMangled;
-
- RegsForValue();
+ bool IsABIMangled = false;
+ RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
bool IsABIMangledValue = false);
-
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
bool IsABIMangledValue = false);
@@ -1024,4 +1043,4 @@ struct RegsForValue {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3dd58975b1f1..dd30dc16378c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,24 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdint>
+#include <iterator>
+
using namespace llvm;
static cl::opt<bool>
@@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+
LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
@@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (getFlags().hasNoUnsignedWrap())
+ OS << " nuw";
+
+ if (getFlags().hasNoSignedWrap())
+ OS << " nsw";
+
+ if (getFlags().hasExact())
+ OS << " exact";
+
+ if (getFlags().hasUnsafeAlgebra())
+ OS << " unsafe";
+
+ if (getFlags().hasNoNaNs())
+ OS << " nnan";
+
+ if (getFlags().hasNoInfs())
+ OS << " ninf";
+
+ if (getFlags().hasNoSignedZeros())
+ OS << " nsz";
+
+ if (getFlags().hasAllowReciprocal())
+ OS << " arcp";
+
+ if (getFlags().hasAllowContract())
+ OS << " contract";
+
+ if (getFlags().hasVectorReduction())
+ OS << " vector-reduction";
+
if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
if (!MN->memoperands_empty()) {
OS << "<";
@@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
OS << '<' << CSDN->getAPIntValue() << '>';
} else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
+ if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
+ else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
@@ -479,7 +528,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(),
+ OS << ' ' << printReg(R->getReg(),
G ? G->getSubtarget().getRegisterInfo() : nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
@@ -640,7 +689,8 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
+using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
+
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
if (!once.insert(N).second) // If we've been here before, return now.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf57e805842..18f6997ef83c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/APInt.h"
@@ -26,7 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -45,9 +46,12 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -80,13 +84,9 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
@@ -212,7 +212,7 @@ namespace llvm {
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction()->getName() << "\n");
+ << IS.MF->getFunction().getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
<< " ; After: -O" << NewOptLevel << "\n");
SavedFastISel = IS.TM.Options.EnableFastISel;
@@ -228,7 +228,7 @@ namespace llvm {
if (IS.OptLevel == SavedOptLevel)
return;
DEBUG(dbgs() << "\nRestoring optimization level for Function "
- << IS.MF->getFunction()->getName() << "\n");
+ << IS.MF->getFunction().getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
<< " ; After: -O" << SavedOptLevel << "\n");
IS.OptLevel = SavedOptLevel;
@@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort > 0 requires -fast-isel");
- const Function &Fn = *mf.getFunction();
+ const Function &Fn = mf.getFunction();
MF = &mf;
// Reset the target options before resetting the optimization
@@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF, *ORE);
+ CurDAG->init(*MF, *ORE, this);
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
- for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
- E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
- LiveInMap.insert(std::make_pair(LI->first, LI->second));
+ for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
+ if (LI.second)
+ LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
@@ -529,12 +528,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
bool IsIndirect = MI->isIndirectDebugValue();
- unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+ if (IsIndirect)
+ assert(MI->getOperand(1).getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
- IsIndirect, LDI->second, Offset, Variable, Expr);
+ IsIndirect, LDI->second, Variable, Expr);
// If this vreg is directly copied into an exported register then
// that COPY instructions also need DBG_VALUE, if it is the only
@@ -556,7 +557,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// declared, rather than whatever is attached to CopyUseMI.
MachineInstr *NewMI =
BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
+ CopyUseMI->getOperand(0).getReg(), Variable, Expr);
MachineBasicBlock::iterator Pos = CopyUseMI;
EntryMBB->insertAfter(Pos, NewMI);
}
@@ -644,6 +645,9 @@ static void reportFastISelFailure(MachineFunction &MF,
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
+ // Allow creating illegal types during DAG building for the basic block.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
@@ -726,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB)
+ << " '" << BlockName << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -739,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -754,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -771,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
{
@@ -782,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
@@ -791,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -804,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
- << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -817,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -830,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -847,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -915,9 +938,9 @@ public:
} // end anonymous namespace
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(dbgs() << "===== Instruction selection begins: BB#"
- << FuncInfo->MBB->getNumber()
- << " '" << FuncInfo->MBB->getName() << "'\n");
+ DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
@@ -1138,7 +1161,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
// Look through casts and constant offset GEPs. These mostly come from
// inalloca.
- APInt Offset(DL.getPointerSizeInBits(0), 0);
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
// Check if the variable is a static alloca or a byval or inalloca
@@ -1177,12 +1200,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
// For each machine basic block in reverse post order.
ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- It = RPOT.begin(),
- E = RPOT.end();
- It != E; ++It) {
- MachineBasicBlock *MBB = *It;
-
+ for (MachineBasicBlock *MBB : RPOT) {
// For each swifterror value in the function.
for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
auto Key = std::make_pair(MBB, SwiftErrorVal);
@@ -1253,6 +1271,8 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
// If we don't need a phi create a copy to the upward exposed vreg.
if (!needPHI) {
assert(UpwardsUse);
+ assert(!VRegs.empty() &&
+ "No predecessors? Is the Calling Convention correct?");
unsigned DestReg = UUseVReg;
BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
DestReg)
@@ -1282,10 +1302,10 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
}
-void preassignSwiftErrorRegs(const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo,
- BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End) {
+static void preassignSwiftErrorRegs(const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo,
+ BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
return;
@@ -2774,6 +2794,12 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckType(Table, Index, N, SDISel.TLI,
SDISel.CurDAG->getDataLayout());
return Index;
+ case SelectionDAGISel::OPC_CheckTypeRes: {
+ unsigned Res = Table[Index++];
+ Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ }
case SelectionDAGISel::OPC_CheckChild0Type:
case SelectionDAGISel::OPC_CheckChild1Type:
case SelectionDAGISel::OPC_CheckChild2Type:
@@ -2906,6 +2932,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::CopyFromReg:
case ISD::CopyToReg:
case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
@@ -3175,6 +3202,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
break;
continue;
+ case OPC_CheckTypeRes: {
+ unsigned Res = MatcherTable[MatcherIndex++];
+ if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ }
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -3548,6 +3583,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
"NodeToMatch was removed partway through selection");
SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
SDNode *E) {
+ CurDAG->salvageDebugInfo(*N);
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
@@ -3725,6 +3761,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
}
+bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return false;
+
+ // Detect when "or" is used to add an offset to a stack object.
+ if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ unsigned A = MFI.getObjectAlignment(FN->getIndex());
+ assert(isPowerOf2_32(A) && "Unexpected alignment");
+ int32_t Off = C->getSExtValue();
+ // If the alleged offset fits in the zero bits guaranteed by
+ // the alignment, then this or is really an add.
+ return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+ }
+ return false;
+}
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 11561dfa5947..be4ab094bf49 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -16,15 +16,13 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "dag-printer"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 55f70f7d9fd3..3a283bc5fdc0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===//
+//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
using namespace llvm;
-SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {}
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default;
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5d78bba86d73..3f64b49e3555 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,21 +14,44 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/Target/TargetLowering.h"
-#include <algorithm>
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "statepoint-lowering"
@@ -73,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
NumSlotsAllocatedForStatepoints++;
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- unsigned SpillSize = ValueType.getSizeInBits() / 8;
+ unsigned SpillSize = ValueType.getStoreSize();
assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
// First look for a previously created stack slot which is not in
@@ -200,7 +223,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
/// values on the stack between calls.
static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
-
SDValue Incoming = Builder.getValue(IncomingValue);
if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
@@ -292,7 +314,6 @@ removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
-
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) =
Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index b043184003a0..372c82a359f6 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,11 +16,16 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include <cassert>
namespace llvm {
+
+class CallInst;
class SelectionDAGBuilder;
/// This class tracks both per-statepoint and per-selectiondag information.
@@ -30,7 +35,7 @@ class SelectionDAGBuilder;
/// works in concert with information in FunctionLoweringInfo.
class StatepointLoweringState {
public:
- StatepointLoweringState() : NextSlotToAllocate(0) {}
+ StatepointLoweringState() = default;
/// Reset all state tracking for a newly encountered safepoint. Also
/// performs some consistency checking.
@@ -69,7 +74,7 @@ public:
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
- auto I = find(PendingGCRelocateCalls, &RelocCall);
+ auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
assert(I != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
PendingGCRelocateCalls.erase(I);
@@ -108,11 +113,12 @@ private:
SmallBitVector AllocatedStackSlots;
/// Points just beyond the last slot known to have been allocated
- unsigned NextSlotToAllocate;
+ unsigned NextSlotToAllocate = 0;
/// Keep track of pending gcrelocate calls for consistency check
SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
};
+
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8652df7bbd70..58276052c10b 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -20,6 +20,9 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -29,10 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const {
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
- const Function *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- AttributeList CallerAttrs = F->getAttributes();
+ AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
.hasAttributes())
@@ -408,7 +408,7 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+ unsigned DemandedSize = Demanded.getActiveBits();
unsigned SmallVTBits = DemandedSize;
if (!isPowerOf2_32(SmallVTBits))
SmallVTBits = NextPowerOf2(SmallVTBits);
@@ -421,9 +421,8 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
- bool NeedZext = DemandedSize > SmallVTBits;
- SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
- dl, Op.getValueType(), X);
+ assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
return TLO.CombineTo(Op, Z);
}
}
@@ -459,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
// If Old has more than one use then it must be Op, because the
// AssumeSingleUse flag is not propogated to recursive calls of
// SimplifyDemanded bits, so the only node with multiple use that
- // it will attempt to combine will be opt.
+ // it will attempt to combine will be Op.
assert(TLO.Old == Op);
SmallVector <SDValue, 4> NewOps;
@@ -470,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
}
NewOps.push_back(User->getOperand(i));
}
- TLO.DAG.UpdateNodeOperands(User, NewOps);
+ User = TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines
// with it.
DCI.AddToWorklist(Op.getNode());
@@ -480,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
return true;
}
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -517,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Don't know anything.
Known = KnownBits(BitWidth);
+ if (Op.getOpcode() == ISD::Constant) {
+ // We know all of the bits for a constant!
+ Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return false;
+ }
+
// Other users may use these bits.
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
@@ -539,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
- case ISD::Constant:
- // We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
- return false; // Don't fall through, will infinitely loop.
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every constant vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -780,33 +781,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::SHL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
+
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
- if (InOp.getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SHL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SRL;
- }
+ if (InOp.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -818,7 +824,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- unsigned InnerBits = InnerVT.getSizeInBits();
+ unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
@@ -837,45 +843,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
- if (InOp.hasOneUse() &&
- InnerOp.getOpcode() == ISD::SRL &&
- InnerOp.hasOneUse() &&
- isa<ConstantSDNode>(InnerOp.getOperand(1))) {
- unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
- ->getZExtValue();
- if (InnerShAmt < ShAmt &&
- InnerShAmt < InnerBits &&
- NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
- NewMask.countTrailingZeros() >= ShAmt) {
- SDValue NewSA =
- TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- InnerOp.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
- NewExt, NewSA));
+ if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse()) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+ unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+ NewMask.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
}
}
}
- Known.Zero <<= SA->getZExtValue();
- Known.One <<= SA->getZExtValue();
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
// low bits known zero.
- Known.Zero.setLowBits(SA->getZExtValue());
+ Known.Zero.setLowBits(ShAmt);
}
break;
case ISD::SRL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
- unsigned VTSize = VT.getSizeInBits();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -886,21 +889,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
- if (InOp.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SRL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SHL;
- }
+ if (InOp.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt &&
+ (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -924,14 +933,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -979,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
case ISD::SIGN_EXTEND_INREG: {
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
- APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
- if (MsbMask == NewMask) {
- unsigned ShAmt = ExVT.getScalarSizeInBits();
+ if (NewMask.isSignMask()) {
SDValue InOp = Op.getOperand(0);
- unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -997,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
ShiftAmtTy);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
Op.getValueType(), InOp,
@@ -1005,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- // Sign extension. Compute the demanded bits in the result that are not
- // present in the input.
- APInt NewBits =
- APInt::getHighBitsSet(BitWidth,
- BitWidth - ExVT.getScalarSizeInBits());
-
// If none of the extended bits are demanded, eliminate the sextinreg.
- if ((NewBits & NewMask) == 0)
+ if (NewMask.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit =
- APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
- APInt InputDemandedBits =
- APInt::getLowBitsSet(BitWidth,
- ExVT.getScalarSizeInBits()) &
- NewMask;
+ APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InputDemandedBits |= InSignBit;
+ InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
Known, TLO, Depth+1))
@@ -1035,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
- if (Known.Zero.intersects(InSignBit))
+ if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
Op.getOperand(0), dl, ExVT.getScalarType()));
- if (Known.One.intersects(InSignBit)) { // Input sign bit known set
- Known.One |= NewBits;
- Known.Zero &= ~NewBits;
+ APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ Known.One.setBitsFrom(ExVTBits);
+ Known.Zero &= Mask;
} else { // Input sign bit unknown
- Known.Zero &= ~NewBits;
- Known.One &= ~NewBits;
+ Known.Zero &= Mask;
+ Known.One &= Mask;
}
break;
}
@@ -1072,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt InMask = NewMask.trunc(OperandBitWidth);
// If none of the top bits are demanded, convert this into an any_extend.
- APInt NewBits =
- APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
- if (!NewBits.intersects(NewMask))
+ if (NewMask.getActiveBits() <= OperandBitWidth)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
- Known.Zero |= NewBits;
+ Known.Zero.setBitsFrom(OperandBitWidth);
break;
}
case ISD::SIGN_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
- APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
- APInt NewBits = ~InMask & NewMask;
+ unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewBits == 0)
+ if (NewMask.getActiveBits() <= InBits)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = InMask & NewMask;
- InDemandedBits |= InSignBit;
- InDemandedBits = InDemandedBits.trunc(InBits);
+ APInt InDemandedBits = NewMask.trunc(InBits);
+ InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
Depth+1))
return true;
- Known = Known.zext(BitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
- if (Known.Zero.intersects(InSignBit))
+ if (Known.isNonNegative())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
- // If the sign bit is known one, the top bits match.
- if (Known.One.intersects(InSignBit)) {
- Known.One |= NewBits;
- assert((Known.Zero & NewBits) == 0);
- } else { // Otherwise, top bits aren't known.
- assert((Known.One & NewBits) == 0);
- assert((Known.Zero & NewBits) == 0);
- }
break;
}
case ISD::ANY_EXTEND: {
@@ -1305,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
}
+void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+
+ if (unsigned Align = DAG.InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2_32(Align));
+ }
+}
+
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
@@ -2967,7 +2963,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -3436,8 +3432,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- EVT PtrVT = BasePtr.getValueType();
-
// Store Stride in bytes
unsigned Stride = MemSclVT.getSizeInBits() / 8;
EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
@@ -3450,8 +3444,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getConstant(Idx, SL, IdxVT));
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(Idx * Stride, SL, PtrVT));
+ SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
@@ -3474,6 +3467,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
EVT VT = LD->getValueType(0);
EVT LoadedVT = LD->getMemoryVT();
SDLoc dl(LD);
+ auto &MF = DAG.getMachineFunction();
+
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
@@ -3498,13 +3493,13 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Copy the value to a (aligned) stack slot using (unaligned) integer
// loads and stores, then do a (aligned) load from the stack slot.
MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned LoadedBytes = LoadedVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
+ auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
SmallVector<SDValue, 8> Stores;
SDValue StackPtr = StackBase;
unsigned Offset = 0;
@@ -3523,13 +3518,14 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo()));
+ Stores.push_back(DAG.getStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
// Increment the pointers.
Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
- StackPtrIncrement);
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
}
// The last copy may be partial. Do an extending load.
@@ -3543,15 +3539,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo(), MemVT));
+ Stores.push_back(DAG.getTruncStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+ LoadedVT);
// Callers expect a MERGE_VALUES node.
return std::make_pair(Load, TF);
@@ -3581,8 +3579,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3591,8 +3589,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3621,6 +3619,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
+ auto &MF = DAG.getMachineFunction();
SDLoc dl(ST);
if (ST->getMemoryVT().isFloatingPoint() ||
@@ -3649,16 +3648,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT::getIntegerVT(*DAG.getContext(),
StoredVT.getSizeInBits()));
EVT PtrVT = Ptr.getValueType();
- unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned StoredBytes = StoredVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
- MachinePointerInfo(), StoredVT);
+ SDValue Store = DAG.getTruncStore(
+ Chain, dl, Val, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
EVT StackPtrVT = StackPtr.getValueType();
@@ -3670,8 +3671,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load =
- DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -3679,9 +3681,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
ST->getMemOperand()->getFlags()));
// Increment the pointers.
Offset += RegBytes;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
- StackPtr, StackPtrIncrement);
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
}
// The last store may be partial. Do a truncating store. On big-endian
@@ -3691,8 +3692,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo(), MemVT);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
@@ -3726,9 +3728,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- EVT PtrVT = Ptr.getValueType();
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(IncrementSize, dl, PtrVT));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
@@ -3767,7 +3767,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
} else
- Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+ Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}
@@ -3797,7 +3797,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue Index) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
- Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
+ Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
EVT EltVT = VecVT.getVectorElementType();
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 7b60d22c7ace..25d405bf63de 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -1,4 +1,4 @@
-//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,14 +16,31 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include <cassert>
+#include <cstddef>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -34,12 +51,11 @@ namespace {
class ShadowStackGCLowering : public FunctionPass {
/// RootChain - This is the global linked-list that contains the chain of GC
/// roots.
- GlobalVariable *Head;
+ GlobalVariable *Head = nullptr;
/// StackEntryTy - Abstract type of a link in the shadow stack.
- ///
- StructType *StackEntryTy;
- StructType *FrameMapTy;
+ StructType *StackEntryTy = nullptr;
+ StructType *FrameMapTy = nullptr;
/// Roots - GC roots in the current function. Each is a pair of the
/// intrinsic call and its corresponding alloca.
@@ -47,6 +63,7 @@ class ShadowStackGCLowering : public FunctionPass {
public:
static char ID;
+
ShadowStackGCLowering();
bool doInitialization(Module &M) override;
@@ -57,6 +74,7 @@ private:
Constant *GetFrameMap(Function &F);
Type *GetConcreteStackEntryType(Function &F);
void CollectRoots(Function &F);
+
static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
Type *Ty, Value *BasePtr, int Idx1,
const char *Name);
@@ -64,7 +82,10 @@ private:
Type *Ty, Value *BasePtr, int Idx1, int Idx2,
const char *Name);
};
-}
+
+} // end anonymous namespace
+
+char ShadowStackGCLowering::ID = 0;
INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
@@ -74,11 +95,7 @@ INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
-char ShadowStackGCLowering::ID = 0;
-
-ShadowStackGCLowering::ShadowStackGCLowering()
- : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
- FrameMapTy(nullptr) {
+ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {
initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index aa75f5e2caa2..b35bf6ba3a7b 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -1,4 +1,4 @@
-//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
+//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -45,50 +45,58 @@
//
// If this pass found points matching all these properties, then
// MachineFrameInfo is updated with this information.
+//
//===----------------------------------------------------------------------===//
+
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-// To check for profitability.
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-// For property #1 for Save.
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-// To record the result of the analysis.
#include "llvm/CodeGen/MachineFrameInfo.h"
-// For property #2.
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-// For property #1 for Restore.
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
-// To know about callee-saved.
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-// To query the target about frame lowering.
-#include "llvm/Target/TargetFrameLowering.h"
-// To know about frame setup operation.
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-// To access TargetInstrInfo.
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-#define DEBUG_TYPE "shrink-wrap"
+#include <cassert>
+#include <cstdint>
+#include <memory>
using namespace llvm;
+#define DEBUG_TYPE "shrink-wrap"
+
STATISTIC(NumFunc, "Number of functions");
STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
STATISTIC(NumCandidatesDropped,
"Number of shrink-wrapping candidates dropped because of frequency");
static cl::opt<cl::boolOrDefault>
- EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
- cl::desc("enable the shrink-wrapping pass"));
+EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+ cl::desc("enable the shrink-wrapping pass"));
namespace {
+
/// \brief Class to determine where the safe point to insert the
/// prologue and epilogue are.
/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
@@ -101,31 +109,42 @@ class ShrinkWrap : public MachineFunctionPass {
RegisterClassInfo RCI;
MachineDominatorTree *MDT;
MachinePostDominatorTree *MPDT;
+
/// Current safe point found for the prologue.
/// The prologue will be inserted before the first instruction
/// in this basic block.
MachineBasicBlock *Save;
+
/// Current safe point found for the epilogue.
/// The epilogue will be inserted before the first terminator instruction
/// in this basic block.
MachineBasicBlock *Restore;
+
/// Hold the information of the basic block frequency.
/// Use to check the profitability of the new points.
MachineBlockFrequencyInfo *MBFI;
+
/// Hold the loop information. Used to determine if Save and Restore
/// are in the same loop.
MachineLoopInfo *MLI;
+
/// Frequency of the Entry block.
uint64_t EntryFreq;
+
/// Current opcode for frame setup.
unsigned FrameSetupOpcode;
+
/// Current opcode for frame destroy.
unsigned FrameDestroyOpcode;
+
/// Entry block.
const MachineBasicBlock *Entry;
- typedef SmallSetVector<unsigned, 16> SetOfRegs;
+
+ using SetOfRegs = SmallSetVector<unsigned, 16>;
+
/// Registers that need to be saved for the current function.
mutable SetOfRegs CurrentCSRs;
+
/// Current MachineFunction.
MachineFunction *MachineFunc;
@@ -205,9 +224,11 @@ public:
/// the MachineFrameInfo attached to \p MF with the results.
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
char ShrinkWrap::ID = 0;
+
char &llvm::ShrinkWrapID = ShrinkWrap::ID;
INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
@@ -219,6 +240,10 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
+ // Ignore DBG_VALUE and other meta instructions that must not affect codegen.
+ if (MI.isMetaInstruction())
+ return false;
+
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
@@ -424,7 +449,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF,
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+ if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
return false;
DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
@@ -537,16 +562,17 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
switch (EnableShrinkWrapOpt) {
case cl::BOU_UNSET:
return TFI->enableShrinkWrapping(MF) &&
- // Windows with CFI has some limitations that make it impossible
- // to use shrink-wrapping.
- !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
- // Sanitizers look at the value of the stack at the location
- // of the crash. Since a crash can happen anywhere, the
- // frame must be lowered before anything else happen for the
- // sanitizers to be able to get a correct stack frame.
- !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
- MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
- MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+ // Windows with CFI has some limitations that make it impossible
+ // to use shrink-wrapping.
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ // Sanitizers look at the value of the stack at the location
+ // of the crash. Since a crash can happen anywhere, the
+ // frame must be lowered before anything else happen for the
+ // sanitizers to be able to get a correct stack frame.
+ !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) ||
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress));
// If EnableShrinkWrap is set, it takes precedence on whatever the
// target sets. The rational is that we assume we want to test
// something related to shrink-wrapping.
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 3656832a7f1a..ea74c777e1e2 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -264,7 +263,7 @@ LLVM_DUMP_METHOD void SlotIndexes::dump() const {
}
for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
- dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';'
<< MBBRanges[i].second << ")\n";
}
#endif
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 0abe1c47da55..b989b54d4190 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -1,4 +1,4 @@
-//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,21 +28,31 @@
//===----------------------------------------------------------------------===//
#include "SpillPlacement.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "spill-code-placement"
char SpillPlacement::ID = 0;
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
@@ -50,8 +60,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
-char &llvm::SpillPlacementID = SpillPlacement::ID;
-
void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -68,10 +76,10 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
/// The node Value is positive when the variable should be in a register. The
/// value can change when linked nodes change, but convergence is very fast
/// because all weights are positive.
-///
struct SpillPlacement::Node {
/// BiasN - Sum of blocks that prefer a spill.
BlockFrequency BiasN;
+
/// BiasP - Sum of blocks that prefer a register.
BlockFrequency BiasP;
@@ -80,7 +88,7 @@ struct SpillPlacement::Node {
/// variable should go in a register through this bundle.
int Value;
- typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector;
+ using LinkVector = SmallVector<std::pair<BlockFrequency, unsigned>, 4>;
/// Links - (Weight, BundleNo) for all transparent blocks connecting to other
/// bundles. The weights are all positive block frequencies.
@@ -104,7 +112,7 @@ struct SpillPlacement::Node {
}
/// clear - Reset per-query data, but preserve frequencies that only depend on
- // the CFG.
+ /// the CFG.
void clear(const BlockFrequency &Threshold) {
BiasN = BiasP = Value = 0;
SumLinkWeights = Threshold;
@@ -260,14 +268,14 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
// Live-in to block?
if (I->Entry != DontCare) {
- unsigned ib = bundles->getBundle(I->Number, 0);
+ unsigned ib = bundles->getBundle(I->Number, false);
activate(ib);
nodes[ib].addBias(Freq, I->Entry);
}
// Live-out from block?
if (I->Exit != DontCare) {
- unsigned ob = bundles->getBundle(I->Number, 1);
+ unsigned ob = bundles->getBundle(I->Number, true);
activate(ob);
nodes[ob].addBias(Freq, I->Exit);
}
@@ -281,8 +289,8 @@ void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
BlockFrequency Freq = BlockFrequencies[*I];
if (Strong)
Freq += Freq;
- unsigned ib = bundles->getBundle(*I, 0);
- unsigned ob = bundles->getBundle(*I, 1);
+ unsigned ib = bundles->getBundle(*I, false);
+ unsigned ob = bundles->getBundle(*I, true);
activate(ib);
activate(ob);
nodes[ib].addBias(Freq, PrefSpill);
@@ -294,8 +302,8 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
++I) {
unsigned Number = *I;
- unsigned ib = bundles->getBundle(Number, 0);
- unsigned ob = bundles->getBundle(Number, 1);
+ unsigned ib = bundles->getBundle(Number, false);
+ unsigned ob = bundles->getBundle(Number, true);
// Ignore self-loops.
if (ib == ob)
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 9b9ecccf9049..aa3ac444e0da 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -1,4 +1,4 @@
-//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -37,9 +37,9 @@ namespace llvm {
class BitVector;
class EdgeBundles;
-class MachineBasicBlock;
-class MachineLoopInfo;
class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineLoopInfo;
class SpillPlacement : public MachineFunctionPass {
struct Node;
@@ -47,7 +47,7 @@ class SpillPlacement : public MachineFunctionPass {
const EdgeBundles *bundles;
const MachineLoopInfo *loops;
const MachineBlockFrequencyInfo *MBFI;
- Node *nodes;
+ Node *nodes = nullptr;
// Nodes that are active in the current computation. Owned by the prepare()
// caller.
@@ -73,7 +73,7 @@ class SpillPlacement : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
+ SpillPlacement() : MachineFunctionPass(ID) {}
~SpillPlacement() override { releaseMemory(); }
/// BorderConstraint - A basic block has separate constraints for entry and
@@ -155,16 +155,16 @@ public:
}
private:
- bool runOnMachineFunction(MachineFunction&) override;
- void getAnalysisUsage(AnalysisUsage&) const override;
+ bool runOnMachineFunction(MachineFunction &mf) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
void releaseMemory() override;
- void activate(unsigned);
+ void activate(unsigned n);
void setThreshold(const BlockFrequency &Entry);
- bool update(unsigned);
+ bool update(unsigned n);
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 61ee508c8394..330ee81342b6 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,11 +12,10 @@
namespace llvm {
- class LiveRangeEdit;
- class MachineFunction;
- class MachineFunctionPass;
- class VirtRegMap;
- class LiveIntervals;
+class LiveRangeEdit;
+class MachineFunction;
+class MachineFunctionPass;
+class VirtRegMap;
/// Spiller interface.
///
@@ -24,12 +23,14 @@ namespace llvm {
/// demand.
class Spiller {
virtual void anchor();
+
public:
virtual ~Spiller() = 0;
/// spill - Spill the LRE.getParent() live interval.
virtual void spill(LiveRangeEdit &LRE) = 0;
- virtual void postOptimization(){};
+
+ virtual void postOptimization() {}
};
/// Create and return a spiller that will insert spill code directly instead
@@ -37,6 +38,7 @@ namespace llvm {
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SPILLER_H
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 323045fd2aaa..c99c3b09d88a 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,20 +13,46 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -125,8 +151,7 @@ InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli)
: MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
- TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
- IPA(lis, MF.getNumBlockIDs()) {}
+ TII(*MF.getSubtarget().getInstrInfo()), IPA(lis, MF.getNumBlockIDs()) {}
void SplitAnalysis::clear() {
UseSlots.clear();
@@ -200,7 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
// Loop over basic blocks where CurLI is live.
MachineFunction::iterator MFI =
LIS.getMBBFromIndex(LVI->start)->getIterator();
- for (;;) {
+ while (true) {
BlockInfo BI;
BI.MBB = &*MFI;
SlotIndex Start, Stop;
@@ -301,7 +326,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
MachineFunction::const_iterator MFI =
LIS.getMBBFromIndex(LVI->start)->getIterator();
SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
- for (;;) {
+ while (true) {
++Count;
LVI = li->advanceTo(LVI, Stop);
if (LVI == LVE)
@@ -333,7 +358,6 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
analyzeUses();
}
-
//===----------------------------------------------------------------------===//
// Split Editor
//===----------------------------------------------------------------------===//
@@ -347,8 +371,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa,
MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt),
TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
- MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
- RegAssign(Allocator) {}
+ MBFI(mbfi), RegAssign(Allocator) {}
void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Edit = &LRE;
@@ -552,7 +575,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
if ((SubRegMask & ~LaneMask).any())
continue;
- unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+ unsigned PopCount = SubRegMask.getNumLanes();
PossibleIndexes.push_back(Idx);
if (PopCount > BestCover) {
BestCover = PopCount;
@@ -572,7 +595,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx));
while (LanesLeft.any()) {
unsigned BestIdx = 0;
- int BestCover = INT_MIN;
+ int BestCover = std::numeric_limits<int>::min();
for (unsigned Idx : PossibleIndexes) {
LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
// Early exit if we found a perfect match.
@@ -583,8 +606,8 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
// Try to cover as much of the remaining lanes as possible but
// as few of the already covered lanes as possible.
- int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
- - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+ int Cover = (SubRegMask & LanesLeft).getNumLanes()
+ - (SubRegMask & ~LanesLeft).getNumLanes();
if (Cover > BestCover) {
BestCover = Cover;
BestIdx = Idx;
@@ -706,7 +729,8 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
SlotIndex End = LIS.getMBBEndIdx(&MBB);
SlotIndex Last = End.getPrevSlot();
- DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", "
+ << Last);
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
if (!ParentVNI) {
DEBUG(dbgs() << ": not live\n");
@@ -785,7 +809,8 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
SlotIndex Start = LIS.getMBBStartIdx(&MBB);
- DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+ DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", "
+ << Start);
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
if (!ParentVNI) {
@@ -875,23 +900,23 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
// Best candidate so far.
MachineBasicBlock *BestMBB = MBB;
- unsigned BestDepth = UINT_MAX;
+ unsigned BestDepth = std::numeric_limits<unsigned>::max();
- for (;;) {
+ while (true) {
const MachineLoop *Loop = Loops.getLoopFor(MBB);
// MBB isn't in a loop, it doesn't get any better. All dominators have a
// higher frequency by definition.
if (!Loop) {
- DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
- << MBB->getNumber() << " at depth 0\n");
+ DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+ << printMBBReference(*MBB) << " at depth 0\n");
return MBB;
}
// We'll never be able to exit the DefLoop.
if (Loop == DefLoop) {
- DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
- << MBB->getNumber() << " in the same loop\n");
+ DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+ << printMBBReference(*MBB) << " in the same loop\n");
return MBB;
}
@@ -900,8 +925,8 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
if (Depth < BestDepth) {
BestMBB = MBB;
BestDepth = Depth;
- DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
- << MBB->getNumber() << " at depth " << Depth << '\n');
+ DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+ << printMBBReference(*MBB) << " at depth " << Depth << '\n');
}
// Leave loop by going to the immediate dominator of the loop header.
@@ -978,7 +1003,7 @@ void SplitEditor::hoistCopies() {
// Track the nearest common dominator for all back-copies for each ParentVNI,
// indexed by ParentVNI->id.
- typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ using DomPair = std::pair<MachineBasicBlock *, SlotIndex>;
SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
// The total cost of all the back-copies for each ParentVNI.
SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
@@ -1040,7 +1065,7 @@ void SplitEditor::hoistCopies() {
DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
<< " for parent " << ParentVNI->id << '@' << ParentVNI->def
- << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << " hoist to " << printMBBReference(*Dom.first) << ' '
<< Dom.second << '\n');
}
@@ -1088,7 +1113,6 @@ void SplitEditor::hoistCopies() {
removeBackCopies(BackCopies);
}
-
/// transferValues - Transfer all possible values to the new live ranges.
/// Values that were rematerialized are left alone, they need LRCalc.extend().
bool SplitEditor::transferValues() {
@@ -1118,7 +1142,7 @@ bool SplitEditor::transferValues() {
// The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
- << '(' << PrintReg(Edit->get(RegIdx)) << ')');
+ << '(' << printReg(Edit->get(RegIdx)) << ')');
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
// Check for a simply defined value that can be blitted directly.
@@ -1151,7 +1175,7 @@ bool SplitEditor::transferValues() {
if (Start != BlockStart) {
VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped value");
- DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
// MBB has its own def. Is it also live-out?
if (BlockEnd <= End)
LRC.setLiveOutValue(&*MBB, VNI);
@@ -1164,7 +1188,7 @@ bool SplitEditor::transferValues() {
// Handle the live-in blocks covered by [Start;End).
assert(Start <= BlockStart && "Expected live-in block");
while (BlockStart < End) {
- DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ DEBUG(dbgs() << ">" << printMBBReference(*MBB));
BlockEnd = LIS.getMBBEndIdx(&*MBB);
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
@@ -1276,6 +1300,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
struct ExtPoint {
ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
: MO(O), RegIdx(R), Next(N) {}
+
MachineOperand MO;
unsigned RegIdx;
SlotIndex Next;
@@ -1306,7 +1331,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
unsigned RegIdx = RegAssign.lookup(Idx);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
MO.setReg(LI.reg);
- DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t'
<< Idx << ':' << RegIdx << '\t' << *MI);
// Extend liveness to Idx if the instruction reads reg.
@@ -1352,9 +1377,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
continue;
// The problem here can be that the new register may have been created
// for a partially defined original register. For example:
- // %vreg827:subreg_hireg<def,read-undef> = ...
+ // %0:subreg_hireg<def,read-undef> = ...
// ...
- // %vreg828<def> = COPY %vreg827
+ // %1 = COPY %0
if (S.empty())
continue;
SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
@@ -1486,7 +1511,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
assert(!LRMap || LRMap->size() == Edit->size());
}
-
//===----------------------------------------------------------------------===//
// Single Block Splitting
//===----------------------------------------------------------------------===//
@@ -1524,7 +1548,6 @@ void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
}
}
-
//===----------------------------------------------------------------------===//
// Global Live Range Splitting Support
//===----------------------------------------------------------------------===//
@@ -1542,9 +1565,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
- DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
- << ") intf " << LeaveBefore << '-' << EnterAfter
- << ", live-through " << IntvIn << " -> " << IntvOut);
+ DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf "
+ << LeaveBefore << '-' << EnterAfter << ", live-through "
+ << IntvIn << " -> " << IntvOut);
assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
@@ -1639,13 +1662,12 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
}
-
void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
unsigned IntvIn, SlotIndex LeaveBefore) {
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
- DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
<< "), uses " << BI.FirstInstr << '-' << BI.LastInstr
<< ", reg-in " << IntvIn << ", leave before " << LeaveBefore
<< (BI.LiveOut ? ", stack-out" : ", killed in block"));
@@ -1737,7 +1759,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
- DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
<< "), uses " << BI.FirstInstr << '-' << BI.LastInstr
<< ", reg-out " << IntvOut << ", enter after " << EnterAfter
<< (BI.LiveIn ? ", stack-in" : ", defined in block"));
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 9d409e924a3d..c0608893d4e5 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,26 +17,32 @@
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Compiler.h"
+#include <utility>
namespace llvm {
-class ConnectedVNInfoEqClasses;
-class LiveInterval;
class LiveIntervals;
class LiveRangeEdit;
class MachineBlockFrequencyInfo;
-class MachineInstr;
+class MachineDominatorTree;
class MachineLoopInfo;
class MachineRegisterInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
class VirtRegMap;
-class VNInfo;
-class raw_ostream;
/// Determines the latest safe point in a block in which we can insert a split,
/// spill or other instruction related with CurLI.
@@ -116,7 +122,7 @@ public:
private:
// Current live interval.
- const LiveInterval *CurLI;
+ const LiveInterval *CurLI = nullptr;
/// Insert Point Analysis.
InsertPointAnalysis IPA;
@@ -200,7 +206,7 @@ public:
/// analyze(li).
unsigned countLiveBlocks(const LiveInterval *li) const;
- typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+ using BlockPtrSet = SmallPtrSet<const MachineBasicBlock *, 16>;
/// shouldSplitSingleBlock - Returns true if it would help to create a local
/// live range for the instructions in BI. There is normally no benefit to
@@ -221,7 +227,6 @@ public:
}
};
-
/// SplitEditor - Edit machine code and LiveIntervals for live range
/// splitting.
///
@@ -245,7 +250,6 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor {
const MachineBlockFrequencyInfo &MBFI;
public:
-
/// ComplementSpillMode - Select how the complement live range should be
/// created. SplitEditor automatically creates interval 0 to contain
/// anything that isn't added to another interval. This complement interval
@@ -273,19 +277,18 @@ public:
};
private:
-
/// Edit - The current parent register and new intervals created.
- LiveRangeEdit *Edit;
+ LiveRangeEdit *Edit = nullptr;
/// Index into Edit of the currently open interval.
/// The index 0 is used for the complement, so the first interval started by
/// openIntv will be 1.
- unsigned OpenIdx;
+ unsigned OpenIdx = 0;
/// The current spill mode, selected by reset().
- ComplementSpillMode SpillMode;
+ ComplementSpillMode SpillMode = SM_Partition;
- typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+ using RegAssignMap = IntervalMap<SlotIndex, unsigned>;
/// Allocator for the interval map. This will eventually be shared with
/// SlotIndexes and LiveIntervals.
@@ -296,8 +299,8 @@ private:
/// Idx.
RegAssignMap RegAssign;
- typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
- typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+ using ValueForcePair = PointerIntPair<VNInfo *, 1>;
+ using ValueMap = DenseMap<std::pair<unsigned, unsigned>, ValueForcePair>;
/// Values - keep track of the mapping from parent values to values in the new
/// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
@@ -419,9 +422,9 @@ private:
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
- SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&,
- VirtRegMap&, MachineDominatorTree&,
- MachineBlockFrequencyInfo &);
+ SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, LiveIntervals &lis,
+ VirtRegMap &vrm, MachineDominatorTree &mdt,
+ MachineBlockFrequencyInfo &mbfi);
/// reset - Prepare for a new split.
void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
@@ -536,6 +539,6 @@ public:
unsigned IntvOut, SlotIndex EnterAfter);
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_SPLITKIT_H
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index e5fc5402cb41..608845498b48 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1,4 +1,4 @@
-//===-- StackColoring.cpp -------------------------------------------------===//
+//===- StackColoring.cpp --------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -22,35 +22,44 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
using namespace llvm;
@@ -366,6 +375,7 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
//
namespace {
+
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
class StackColoring : public MachineFunctionPass {
@@ -378,32 +388,40 @@ class StackColoring : public MachineFunctionPass {
struct BlockLifetimeInfo {
/// Which slots BEGINs in each basic block.
BitVector Begin;
+
/// Which slots ENDs in each basic block.
BitVector End;
+
/// Which slots are marked as LIVE_IN, coming into each basic block.
BitVector LiveIn;
+
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
BitVector LiveOut;
};
/// Maps active slots (per bit) for each basic block.
- typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+ using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
LivenessMap BlockLiveness;
/// Maps serial numbers to basic blocks.
- DenseMap<const MachineBasicBlock*, int> BasicBlocks;
+ DenseMap<const MachineBasicBlock *, int> BasicBlocks;
+
/// Maps basic blocks to a serial number.
- SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+ SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
/// Maps slots to their use interval. Outside of this interval, slots
/// values are either dead or `undef` and they will not be written to.
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+
/// Maps slots to the points where they can become in-use.
SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+
/// VNInfo is used for the construction of LiveIntervals.
VNInfo::Allocator VNInfoAllocator;
+
/// SlotIndex analysis object.
SlotIndexes *Indexes;
+
/// The stack protector object.
StackProtector *SP;
@@ -424,13 +442,18 @@ class StackColoring : public MachineFunctionPass {
public:
static char ID;
+
StackColoring() : MachineFunctionPass(ID) {
initializeStackColoringPass(*PassRegistry::getPassRegistry());
}
+
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
private:
+ /// Used in collectMarkers
+ using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
+
/// Debug.
void dump() const;
void dumpIntervals() const;
@@ -489,13 +512,12 @@ private:
/// Map entries which point to other entries to their destination.
/// A->B->C becomes A->C.
void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
-
- /// Used in collectMarkers
- typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
};
+
} // end anonymous namespace
char StackColoring::ID = 0;
+
char &llvm::StackColoringID = StackColoring::ID;
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
@@ -559,16 +581,13 @@ static inline int getStartOrEndSlot(const MachineInstr &MI)
return -1;
}
-//
// At the moment the only way to end a variable lifetime is with
// a VARIABLE_LIFETIME op (which can't contain a start). If things
// change and the IR allows for a single inst that both begins
// and ends lifetime(s), this interface will need to be reworked.
-//
bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
SmallVector<int, 4> &slots,
- bool &isStart)
-{
+ bool &isStart) {
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
@@ -608,8 +627,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
return false;
}
-unsigned StackColoring::collectMarkers(unsigned NumSlot)
-{
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
unsigned MarkersFound = 0;
BlockBitVecMap SeenStartMap;
InterestingSlots.clear();
@@ -624,7 +642,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
// Step 1: collect markers and populate the "InterestingSlots"
// and "ConservativeSlots" sets.
for (MachineBasicBlock *MBB : depth_first(MF)) {
-
// Compute the set of slots for which we've seen a START marker but have
// not yet seen an END marker at this point in the walk (e.g. on entry
// to this bb).
@@ -697,7 +714,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
// NOTE: We use a depth-first iteration to ensure that we obtain a
// deterministic numbering.
for (MachineBasicBlock *MBB : depth_first(MF)) {
-
// Assign a serial number to this basic block.
BasicBlocks[MBB] = BasicBlockNumbering.size();
BasicBlockNumbering.push_back(MBB);
@@ -723,7 +739,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
} else {
for (auto Slot : slots) {
DEBUG(dbgs() << "Found a use of slot #" << Slot);
- DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
+ DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index ");
DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
@@ -745,8 +761,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
return MarkersFound;
}
-void StackColoring::calculateLocalLiveness()
-{
+void StackColoring::calculateLocalLiveness() {
unsigned NumIters = 0;
bool changed = true;
while (changed) {
@@ -754,7 +769,6 @@ void StackColoring::calculateLocalLiveness()
++NumIters;
for (const MachineBasicBlock *BB : BasicBlockNumbering) {
-
// Use an iterator to avoid repeated lookups.
LivenessMap::iterator BI = BlockLiveness.find(BB);
assert(BI != BlockLiveness.end() && "Block not found");
@@ -792,7 +806,7 @@ void StackColoring::calculateLocalLiveness()
BlockInfo.LiveOut |= LocalLiveOut;
}
}
- }// while changed.
+ } // while changed.
NumIterations = NumIters;
}
@@ -818,7 +832,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
// Create the interval for the basic blocks containing lifetime begin/end.
for (const MachineInstr &MI : MBB) {
-
SmallVector<int, 4> slots;
bool IsStart = false;
if (!isLifetimeStartOrEnd(MI, slots, IsStart))
@@ -1047,7 +1060,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
for (WinEHHandlerType &H : TBME.HandlerArray)
- if (H.CatchObj.FrameIndex != INT_MAX &&
+ if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
SlotRemap.count(H.CatchObj.FrameIndex))
H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
@@ -1116,8 +1129,7 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
DEBUG(dbgs() << "********** Stack Coloring **********\n"
- << "********** Function: "
- << ((const Value*)Func.getFunction())->getName() << '\n');
+ << "********** Function: " << Func.getName() << '\n');
MF = &Func;
MFI = &MF->getFrameInfo();
Indexes = &getAnalysis<SlotIndexes>();
@@ -1157,7 +1169,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Don't continue because there are not enough lifetime markers, or the
// stack is too small, or we are told not to optimize the slots.
if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
- skipFunction(*Func.getFunction())) {
+ skipFunction(Func.getFunction())) {
DEBUG(dbgs()<<"Will not try to merge slots.\n");
return removeAllMarkers();
}
@@ -1231,7 +1243,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
LiveInterval *Second = &*Intervals[SecondSlot];
auto &FirstS = LiveStarts[FirstSlot];
auto &SecondS = LiveStarts[SecondSlot];
- assert (!First->empty() && !Second->empty() && "Found an empty range");
+ assert(!First->empty() && !Second->empty() && "Found an empty range");
// Merge disjoint slots. This is a little bit tricky - see the
// Implementation Notes section for an explanation.
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index a5ef7c8229f5..cc9af92c395f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -19,10 +19,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index b4fa29d9a86b..e66a25bec911 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -16,6 +16,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -27,9 +30,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "stackmaps"
static cl::opt<int> StackMapVersion(
- "stackmap-version", cl::init(3),
+ "stackmap-version", cl::init(3), cl::Hidden,
cl::desc("Specify the stackmap encoding version (default = 3)"));
const char *StackMaps::WSMP = "Stack Maps: ";
@@ -193,14 +193,14 @@ void StackMaps::print(raw_ostream &OS) {
case Location::Register:
OS << "Register ";
if (TRI)
- OS << TRI->getName(Loc.Reg);
+ OS << printReg(Loc.Reg, TRI);
else
OS << Loc.Reg;
break;
case Location::Direct:
OS << "Direct ";
if (TRI)
- OS << TRI->getName(Loc.Reg);
+ OS << printReg(Loc.Reg, TRI);
else
OS << Loc.Reg;
if (Loc.Offset)
@@ -209,7 +209,7 @@ void StackMaps::print(raw_ostream &OS) {
case Location::Indirect:
OS << "Indirect ";
if (TRI)
- OS << TRI->getName(Loc.Reg);
+ OS << printReg(Loc.Reg, TRI);
else
OS << Loc.Reg;
OS << "+" << Loc.Offset;
@@ -233,7 +233,7 @@ void StackMaps::print(raw_ostream &OS) {
for (const auto &LO : LiveOuts) {
OS << WSMP << "\t\tLO " << Idx << ": ";
if (TRI)
- OS << TRI->getName(LO.Reg);
+ OS << printReg(LO.Reg, TRI);
else
OS << LO.Reg;
OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index d8e7840a2576..62cef95a4af2 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -14,14 +14,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -42,10 +44,8 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <utility>
using namespace llvm;
@@ -247,10 +247,12 @@ bool StackProtector::RequiresStackProtector() {
OptimizationRemarkEmitter ORE(F);
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
<< "Stack protection applied to function "
<< ore::NV("Function", F)
- << " due to a function attribute or command-line switch");
+ << " due to a function attribute or command-line switch";
+ });
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
@@ -264,29 +266,31 @@ bool StackProtector::RequiresStackProtector() {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
- OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
- &I);
- Remark
- << "Stack protection applied to function "
- << ore::NV("Function", F)
- << " due to a call to alloca or use of a variable length array";
+ auto RemarkBuilder = [&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+ &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a call to alloca or use of a variable length "
+ "array";
+ };
if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
- ORE.emit(Remark);
+ ORE.emit(RemarkBuilder);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
- ORE.emit(Remark);
+ ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
- ORE.emit(Remark);
+ ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
continue;
@@ -296,11 +300,13 @@ bool StackProtector::RequiresStackProtector() {
if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
: SSPLK_SmallArray));
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
<< "Stack protection applied to function "
<< ore::NV("Function", F)
<< " due to a stack allocated buffer or struct containing a "
- "buffer");
+ "buffer";
+ });
NeedsProtector = true;
continue;
}
@@ -308,11 +314,13 @@ bool StackProtector::RequiresStackProtector() {
if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
- ORE.emit(
- OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I)
- << "Stack protection applied to function "
- << ore::NV("Function", F)
- << " due to the address of a local variable being taken");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
+ &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to the address of a local variable being taken";
+ });
NeedsProtector = true;
}
}
@@ -377,8 +385,12 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
/// - The epilogue checks the value stored in the prologue against the original
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
+ // If the target wants to XOR the frame pointer into the guard value, it's
+ // impossible to emit the check in IR, so the target *must* support stack
+ // protection in SDAG.
bool SupportsSelectionDAGSP =
- EnableSelectionDAGSP && !TM->Options.EnableFastISel;
+ TLI->useStackGuardXorFP() ||
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 856bca19dee8..62f662d1ade4 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -1,4 +1,4 @@
-//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,22 +14,34 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "stack-slot-coloring"
@@ -45,6 +57,7 @@ STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
+
class StackSlotColoring : public MachineFunctionPass {
LiveStacks* LS;
MachineFrameInfo *MFI;
@@ -73,7 +86,7 @@ namespace {
BitVector AllColors;
// NextColor - Next "color" that's not yet used.
- int NextColor;
+ int NextColor = -1;
// UsedColors - "Colors" that have been assigned.
BitVector UsedColors;
@@ -83,10 +96,10 @@ namespace {
public:
static char ID; // Pass identification
- StackSlotColoring() :
- MachineFunctionPass(ID), NextColor(-1) {
- initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
- }
+
+ StackSlotColoring() : MachineFunctionPass(ID) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -111,9 +124,11 @@ namespace {
MachineFunction &MF);
bool RemoveDeadStores(MachineBasicBlock* MBB);
};
+
} // end anonymous namespace
char StackSlotColoring::ID = 0;
+
char &llvm::StackSlotColoringID = StackSlotColoring::ID;
INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
@@ -125,14 +140,16 @@ INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
namespace {
- // IntervalSorter - Comparison predicate that sort live intervals by
- // their weight.
- struct IntervalSorter {
- bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
- return LHS->weight > RHS->weight;
- }
- };
-}
+
+// IntervalSorter - Comparison predicate that sort live intervals by
+// their weight.
+struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+};
+
+} // end anonymous namespace
/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
/// references and update spill slot weights.
@@ -185,8 +202,10 @@ void StackSlotColoring::InitializeSlots() {
UsedColors.resize(LastFI);
Assignments.resize(LastFI);
- typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+ using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
+
SmallVector<Pair *, 16> Intervals;
+
Intervals.reserve(LS->getNumIntervals());
for (auto &I : *LS)
Intervals.push_back(&I);
@@ -229,10 +248,11 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
}
/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
-///
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+
if (!DisableSharing) {
// Check if it's possible to reuse any of the used colors.
Color = UsedColors.find_first();
@@ -246,6 +266,11 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
}
}
+ if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) {
+ DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");
+ Share = false;
+ }
+
// Assign it to the first available color (assumed to be the best) if it's
// not possible to share a used color with other objects.
if (!Share) {
@@ -257,7 +282,6 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Record the assignment.
Assignments[Color].push_back(li);
- int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -364,7 +388,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
// The MachineMemOperands have already been updated.
}
-
/// RemoveDeadStores - Scan through a basic block and look for loads followed
/// by stores. If they're both using the same stack slot, then the store is
/// definitely dead. This could obviously be much more aggressive (consider
@@ -426,7 +449,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
return changed;
}
-
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Stack Slot Coloring **********\n"
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 489a607eb176..df1eebf43b2b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -15,6 +15,8 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Pass.h"
@@ -47,12 +49,15 @@ char &llvm::TailDuplicateID = TailDuplicatePass::ID;
INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
+ if (skipFunction(MF.getFunction()))
return false;
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
+ // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better
+ // split this into two passes instead.
+ bool PreRegAlloc = MF.getRegInfo().isSSA();
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index dc7265dcf6c2..f51c884839b3 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,16 +28,15 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -75,7 +75,7 @@ static cl::opt<bool>
static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
cl::Hidden);
-void TailDuplicator::initMF(MachineFunction &MFin,
+void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
@@ -89,7 +89,7 @@ void TailDuplicator::initMF(MachineFunction &MFin,
assert(MBPI != nullptr && "Machine Branch Probability Info required");
LayoutMode = LayoutModeIn;
- PreRegAlloc = MRI->isSSA();
+ this->PreRegAlloc = PreRegAlloc;
}
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
@@ -111,9 +111,10 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
if (!Found) {
- dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
- dbgs() << " missing input from predecessor BB#"
- << PredBB->getNumber() << '\n';
+ dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ << *MI;
+ dbgs() << " missing input from predecessor "
+ << printMBBReference(*PredBB) << '\n';
llvm_unreachable(nullptr);
}
}
@@ -121,15 +122,16 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
if (CheckExtra && !Preds.count(PHIBB)) {
- dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
- << *MI;
- dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber()
- << '\n';
+ dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
+ << ": " << *MI;
+ dbgs() << " extra input from predecessor "
+ << printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
}
if (PHIBB->getNumber() < 0) {
- dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
- dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ << *MI;
+ dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
}
}
@@ -369,10 +371,10 @@ void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi) {
- MachineInstr *NewMI = TII->duplicate(*MI, *MF);
+ MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
if (PreRegAlloc) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
+ for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI.getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -443,7 +445,6 @@ void TailDuplicator::duplicateInstruction(
}
}
}
- PredBB->insert(PredBB->instr_end(), NewMI);
}
/// After FromBB is tail duplicated into its predecessor blocks, the successors
@@ -549,7 +550,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
unsigned MaxDuplicateCount;
if (TailDupSize == 0 &&
TailDuplicateSize.getNumOccurrences() == 0 &&
- MF->getFunction()->optForSize())
+ MF->getFunction().optForSize())
MaxDuplicateCount = 1;
else if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
@@ -784,7 +785,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies) {
- DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+ DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
+ << '\n');
DenseSet<unsigned> UsedByPhi;
getRegsUsedByPHIs(*TailBB, &UsedByPhi);
@@ -825,10 +827,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
- // Use instr_iterator here to properly handle bundles, e.g.
- // ARM Thumb2 IT block.
- MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
- while (I != TailBB->instr_end()) {
+ for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+ I != E; /* empty */) {
MachineInstr *MI = &*I;
++I;
if (MI->isPHI()) {
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9dd98b4020d2..b2151eb49655 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -15,16 +15,16 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -32,7 +32,7 @@ TargetFrameLowering::~TargetFrameLowering() = default;
/// The default implementation just looks at attribute "no-frame-pointer-elim".
bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
- auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim");
+ auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim");
return Attr.getValueAsString() == "true";
}
@@ -82,7 +82,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
return;
// In Naked functions we aren't going to save any registers.
- if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ if (MF.getFunction().hasFnAttribute(Attribute::Naked))
return;
// Functions which call __builtin_unwind_init get all their registers saved.
@@ -99,7 +99,7 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
const MachineFunction &MF) const {
// When HHVM function is called, the stack is skewed as the return address
// is removed from the stack before we enter the function.
- if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+ if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM))
return MF.getTarget().getPointerSize();
return 0;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 14c5adc0d898..db925f803db6 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -19,6 +19,9 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -26,10 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <cctype>
using namespace llvm;
@@ -67,6 +67,11 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
+static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
+ return strncmp(Str, MAI.getCommentString().data(),
+ MAI.getCommentString().size()) == 0;
+}
+
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
@@ -75,29 +80,46 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
/// multiple instructions separated by SeparatorString or newlines.
/// Variable-length instructions are not handled here; this function
/// may be overloaded in the target code to do that.
+/// We implement a special case of the .space directive which takes only a
+/// single integer argument in base 10 that is the size in bytes. This is a
+/// restricted form of the GAS directive in that we only interpret
+/// simple--i.e. not a logical or arithmetic expression--size values without
+/// the optional fill value. This is primarily used for creating arbitrary
+/// sized inline asm blocks for testing purposes.
unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
const MCAsmInfo &MAI) const {
// Count the number of instructions in the asm.
- bool atInsnStart = true;
- unsigned InstCount = 0;
+ bool AtInsnStart = true;
+ unsigned Length = 0;
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0) {
- atInsnStart = true;
- } else if (strncmp(Str, MAI.getCommentString().data(),
- MAI.getCommentString().size()) == 0) {
+ AtInsnStart = true;
+ } else if (isAsmComment(Str, MAI)) {
// Stop counting as an instruction after a comment until the next
// separator.
- atInsnStart = false;
+ AtInsnStart = false;
}
- if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- ++InstCount;
- atInsnStart = false;
+ if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ unsigned AddLength = MAI.getMaxInstLength();
+ if (strncmp(Str, ".space", 6) == 0) {
+ char *EStr;
+ int SpaceSize;
+ SpaceSize = strtol(Str + 6, &EStr, 10);
+ SpaceSize = SpaceSize < 0 ? 0 : SpaceSize;
+ while (*EStr != '\n' && std::isspace(static_cast<unsigned char>(*EStr)))
+ ++EStr;
+ if (*EStr == '\0' || *EStr == '\n' ||
+ isAsmComment(EStr, MAI)) // Successfully parsed .space argument
+ AddLength = SpaceSize;
+ }
+ Length += AddLength;
+ AtInsnStart = false;
}
}
- return InstCount * MAI.getMaxInstLength();
+ return Length;
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
@@ -169,7 +191,7 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
MachineInstr *CommutedMI = nullptr;
if (NewMI) {
// Create a new instruction.
- MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFunction &MF = *MI.getMF();
CommutedMI = MF.CloneMachineInstr(&MI);
} else {
CommutedMI = &MI;
@@ -388,10 +410,11 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
-MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig,
- MachineFunction &MF) const {
+MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
- return MF.CloneMachineInstr(&Orig);
+ MachineFunction &MF = *MBB.getParent();
+ return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig);
}
// If the COPY instruction in MI can be folded to a stack operation, return
@@ -415,7 +438,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
"Cannot fold physregs");
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
@@ -495,21 +518,13 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
return NewMI;
}
-/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
-/// slot into the specified machine instruction for the specified operand(s).
-/// If this is possible, a new instruction is returned with the specified
-/// operand folded, otherwise NULL is returned. The client is responsible for
-/// removing the old instruction and adding the new one in the instruction
-/// stream.
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
LiveIntervals *LIS) const {
auto Flags = MachineMemOperand::MONone;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (MI.getOperand(Ops[i]).isDef())
- Flags |= MachineMemOperand::MOStore;
- else
- Flags |= MachineMemOperand::MOLoad;
+ for (unsigned OpIdx : Ops)
+ Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
+ : MachineMemOperand::MOLoad;
MachineBasicBlock *MBB = MI.getParent();
assert(MBB && "foldMemoryOperand needs an inserted instruction");
@@ -525,10 +540,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
if (Flags & MachineMemOperand::MOStore) {
MemSize = MFI.getObjectSize(FI);
} else {
- for (unsigned Idx : Ops) {
+ for (unsigned OpIdx : Ops) {
int64_t OpSize = MFI.getObjectSize(FI);
- if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+ if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) {
unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
if (SubRegSize > 0 && !(SubRegSize % 8))
OpSize = SubRegSize / 8;
@@ -590,6 +605,54 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
return &*--Pos;
}
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned OpIdx : Ops)
+ assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!");
+#endif
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = nullptr;
+ int FrameIndex = 0;
+
+ if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) &&
+ isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ // Fold stackmap/patchpoint.
+ NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ if (NewMI)
+ NewMI = &*MBB.insert(MI, NewMI);
+ } else {
+ // Ask the target to do the actual folding.
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
+ }
+
+ if (!NewMI)
+ return nullptr;
+
+ // Copy the memoperands from the load to the folded instruction.
+ if (MI.memoperands_empty()) {
+ NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+ } else {
+ // Handle the rare case of folding multiple loads.
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
+ E = LoadMI.memoperands_end();
+ I != E; ++I) {
+ NewMI->addMemOperand(MF, *I);
+ }
+ }
+ return NewMI;
+}
+
bool TargetInstrInfo::hasReassociableOperands(
const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
const MachineOperand &Op1 = Inst.getOperand(1);
@@ -685,11 +748,13 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
return false;
}
+
/// Return true when a code sequence can improve loop throughput.
bool
TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
return false;
}
+
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
@@ -698,7 +763,7 @@ void TargetInstrInfo::reassociateOps(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
- MachineFunction *MF = Root.getParent()->getParent();
+ MachineFunction *MF = Root.getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -781,7 +846,7 @@ void TargetInstrInfo::genAlternativeCodeSequence(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
- MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
// Select the previous instruction in the sequence based on the input pattern.
MachineInstr *Prev = nullptr;
@@ -803,59 +868,9 @@ void TargetInstrInfo::genAlternativeCodeSequence(
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
}
-/// foldMemoryOperand - Same as the previous version except it allows folding
-/// of any load and store from / to any address, not just from a specific
-/// stack slot.
-MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
- ArrayRef<unsigned> Ops,
- MachineInstr &LoadMI,
- LiveIntervals *LIS) const {
- assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
-#ifndef NDEBUG
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!");
-#endif
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
-
- // Ask the target to do the actual folding.
- MachineInstr *NewMI = nullptr;
- int FrameIndex = 0;
-
- if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT ||
- MI.getOpcode() == TargetOpcode::STATEPOINT) &&
- isLoadFromStackSlot(LoadMI, FrameIndex)) {
- // Fold stackmap/patchpoint.
- NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
- if (NewMI)
- NewMI = &*MBB.insert(MI, NewMI);
- } else {
- // Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
- }
-
- if (!NewMI) return nullptr;
-
- // Copy the memoperands from the load to the folded instruction.
- if (MI.memoperands_empty()) {
- NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
- }
- else {
- // Handle the rare case of folding multiple loads.
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
- E = LoadMI.memoperands_end();
- I != E; ++I) {
- NewMI->addMemOperand(MF, *I);
- }
- }
- return NewMI;
-}
-
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
const MachineInstr &MI, AliasAnalysis *AA) const {
- const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineFunction &MF = *MI.getMF();
const MachineRegisterInfo &MRI = MF.getRegInfo();
// Remat clients assume operand 0 is the defined register.
@@ -933,7 +948,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
}
int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
- const MachineFunction *MF = MI.getParent()->getParent();
+ const MachineFunction *MF = MI.getMF();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 3914ee514712..543c12eebb45 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1,4 +1,4 @@
-//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,32 +13,55 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cctype>
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
static cl::opt<bool> JumpIsExpensiveOverride(
@@ -78,186 +101,13 @@ static cl::opt<int> MinPercentageForPredictableBranch(
cl::Hidden);
/// InitLibcallNames - Set default libcall names.
-///
static void InitLibcallNames(const char **Names, const Triple &TT) {
- Names[RTLIB::SHL_I16] = "__ashlhi3";
- Names[RTLIB::SHL_I32] = "__ashlsi3";
- Names[RTLIB::SHL_I64] = "__ashldi3";
- Names[RTLIB::SHL_I128] = "__ashlti3";
- Names[RTLIB::SRL_I16] = "__lshrhi3";
- Names[RTLIB::SRL_I32] = "__lshrsi3";
- Names[RTLIB::SRL_I64] = "__lshrdi3";
- Names[RTLIB::SRL_I128] = "__lshrti3";
- Names[RTLIB::SRA_I16] = "__ashrhi3";
- Names[RTLIB::SRA_I32] = "__ashrsi3";
- Names[RTLIB::SRA_I64] = "__ashrdi3";
- Names[RTLIB::SRA_I128] = "__ashrti3";
- Names[RTLIB::MUL_I8] = "__mulqi3";
- Names[RTLIB::MUL_I16] = "__mulhi3";
- Names[RTLIB::MUL_I32] = "__mulsi3";
- Names[RTLIB::MUL_I64] = "__muldi3";
- Names[RTLIB::MUL_I128] = "__multi3";
- Names[RTLIB::MULO_I32] = "__mulosi4";
- Names[RTLIB::MULO_I64] = "__mulodi4";
- Names[RTLIB::MULO_I128] = "__muloti4";
- Names[RTLIB::SDIV_I8] = "__divqi3";
- Names[RTLIB::SDIV_I16] = "__divhi3";
- Names[RTLIB::SDIV_I32] = "__divsi3";
- Names[RTLIB::SDIV_I64] = "__divdi3";
- Names[RTLIB::SDIV_I128] = "__divti3";
- Names[RTLIB::UDIV_I8] = "__udivqi3";
- Names[RTLIB::UDIV_I16] = "__udivhi3";
- Names[RTLIB::UDIV_I32] = "__udivsi3";
- Names[RTLIB::UDIV_I64] = "__udivdi3";
- Names[RTLIB::UDIV_I128] = "__udivti3";
- Names[RTLIB::SREM_I8] = "__modqi3";
- Names[RTLIB::SREM_I16] = "__modhi3";
- Names[RTLIB::SREM_I32] = "__modsi3";
- Names[RTLIB::SREM_I64] = "__moddi3";
- Names[RTLIB::SREM_I128] = "__modti3";
- Names[RTLIB::UREM_I8] = "__umodqi3";
- Names[RTLIB::UREM_I16] = "__umodhi3";
- Names[RTLIB::UREM_I32] = "__umodsi3";
- Names[RTLIB::UREM_I64] = "__umoddi3";
- Names[RTLIB::UREM_I128] = "__umodti3";
-
- Names[RTLIB::NEG_I32] = "__negsi2";
- Names[RTLIB::NEG_I64] = "__negdi2";
- Names[RTLIB::ADD_F32] = "__addsf3";
- Names[RTLIB::ADD_F64] = "__adddf3";
- Names[RTLIB::ADD_F80] = "__addxf3";
- Names[RTLIB::ADD_F128] = "__addtf3";
- Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
- Names[RTLIB::SUB_F32] = "__subsf3";
- Names[RTLIB::SUB_F64] = "__subdf3";
- Names[RTLIB::SUB_F80] = "__subxf3";
- Names[RTLIB::SUB_F128] = "__subtf3";
- Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
- Names[RTLIB::MUL_F32] = "__mulsf3";
- Names[RTLIB::MUL_F64] = "__muldf3";
- Names[RTLIB::MUL_F80] = "__mulxf3";
- Names[RTLIB::MUL_F128] = "__multf3";
- Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
- Names[RTLIB::DIV_F32] = "__divsf3";
- Names[RTLIB::DIV_F64] = "__divdf3";
- Names[RTLIB::DIV_F80] = "__divxf3";
- Names[RTLIB::DIV_F128] = "__divtf3";
- Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
- Names[RTLIB::REM_F32] = "fmodf";
- Names[RTLIB::REM_F64] = "fmod";
- Names[RTLIB::REM_F80] = "fmodl";
- Names[RTLIB::REM_F128] = "fmodl";
- Names[RTLIB::REM_PPCF128] = "fmodl";
- Names[RTLIB::FMA_F32] = "fmaf";
- Names[RTLIB::FMA_F64] = "fma";
- Names[RTLIB::FMA_F80] = "fmal";
- Names[RTLIB::FMA_F128] = "fmal";
- Names[RTLIB::FMA_PPCF128] = "fmal";
- Names[RTLIB::POWI_F32] = "__powisf2";
- Names[RTLIB::POWI_F64] = "__powidf2";
- Names[RTLIB::POWI_F80] = "__powixf2";
- Names[RTLIB::POWI_F128] = "__powitf2";
- Names[RTLIB::POWI_PPCF128] = "__powitf2";
- Names[RTLIB::SQRT_F32] = "sqrtf";
- Names[RTLIB::SQRT_F64] = "sqrt";
- Names[RTLIB::SQRT_F80] = "sqrtl";
- Names[RTLIB::SQRT_F128] = "sqrtl";
- Names[RTLIB::SQRT_PPCF128] = "sqrtl";
- Names[RTLIB::LOG_F32] = "logf";
- Names[RTLIB::LOG_F64] = "log";
- Names[RTLIB::LOG_F80] = "logl";
- Names[RTLIB::LOG_F128] = "logl";
- Names[RTLIB::LOG_PPCF128] = "logl";
- Names[RTLIB::LOG2_F32] = "log2f";
- Names[RTLIB::LOG2_F64] = "log2";
- Names[RTLIB::LOG2_F80] = "log2l";
- Names[RTLIB::LOG2_F128] = "log2l";
- Names[RTLIB::LOG2_PPCF128] = "log2l";
- Names[RTLIB::LOG10_F32] = "log10f";
- Names[RTLIB::LOG10_F64] = "log10";
- Names[RTLIB::LOG10_F80] = "log10l";
- Names[RTLIB::LOG10_F128] = "log10l";
- Names[RTLIB::LOG10_PPCF128] = "log10l";
- Names[RTLIB::EXP_F32] = "expf";
- Names[RTLIB::EXP_F64] = "exp";
- Names[RTLIB::EXP_F80] = "expl";
- Names[RTLIB::EXP_F128] = "expl";
- Names[RTLIB::EXP_PPCF128] = "expl";
- Names[RTLIB::EXP2_F32] = "exp2f";
- Names[RTLIB::EXP2_F64] = "exp2";
- Names[RTLIB::EXP2_F80] = "exp2l";
- Names[RTLIB::EXP2_F128] = "exp2l";
- Names[RTLIB::EXP2_PPCF128] = "exp2l";
- Names[RTLIB::SIN_F32] = "sinf";
- Names[RTLIB::SIN_F64] = "sin";
- Names[RTLIB::SIN_F80] = "sinl";
- Names[RTLIB::SIN_F128] = "sinl";
- Names[RTLIB::SIN_PPCF128] = "sinl";
- Names[RTLIB::COS_F32] = "cosf";
- Names[RTLIB::COS_F64] = "cos";
- Names[RTLIB::COS_F80] = "cosl";
- Names[RTLIB::COS_F128] = "cosl";
- Names[RTLIB::COS_PPCF128] = "cosl";
- Names[RTLIB::POW_F32] = "powf";
- Names[RTLIB::POW_F64] = "pow";
- Names[RTLIB::POW_F80] = "powl";
- Names[RTLIB::POW_F128] = "powl";
- Names[RTLIB::POW_PPCF128] = "powl";
- Names[RTLIB::CEIL_F32] = "ceilf";
- Names[RTLIB::CEIL_F64] = "ceil";
- Names[RTLIB::CEIL_F80] = "ceill";
- Names[RTLIB::CEIL_F128] = "ceill";
- Names[RTLIB::CEIL_PPCF128] = "ceill";
- Names[RTLIB::TRUNC_F32] = "truncf";
- Names[RTLIB::TRUNC_F64] = "trunc";
- Names[RTLIB::TRUNC_F80] = "truncl";
- Names[RTLIB::TRUNC_F128] = "truncl";
- Names[RTLIB::TRUNC_PPCF128] = "truncl";
- Names[RTLIB::RINT_F32] = "rintf";
- Names[RTLIB::RINT_F64] = "rint";
- Names[RTLIB::RINT_F80] = "rintl";
- Names[RTLIB::RINT_F128] = "rintl";
- Names[RTLIB::RINT_PPCF128] = "rintl";
- Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
- Names[RTLIB::NEARBYINT_F64] = "nearbyint";
- Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
- Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
- Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
- Names[RTLIB::ROUND_F32] = "roundf";
- Names[RTLIB::ROUND_F64] = "round";
- Names[RTLIB::ROUND_F80] = "roundl";
- Names[RTLIB::ROUND_F128] = "roundl";
- Names[RTLIB::ROUND_PPCF128] = "roundl";
- Names[RTLIB::FLOOR_F32] = "floorf";
- Names[RTLIB::FLOOR_F64] = "floor";
- Names[RTLIB::FLOOR_F80] = "floorl";
- Names[RTLIB::FLOOR_F128] = "floorl";
- Names[RTLIB::FLOOR_PPCF128] = "floorl";
- Names[RTLIB::FMIN_F32] = "fminf";
- Names[RTLIB::FMIN_F64] = "fmin";
- Names[RTLIB::FMIN_F80] = "fminl";
- Names[RTLIB::FMIN_F128] = "fminl";
- Names[RTLIB::FMIN_PPCF128] = "fminl";
- Names[RTLIB::FMAX_F32] = "fmaxf";
- Names[RTLIB::FMAX_F64] = "fmax";
- Names[RTLIB::FMAX_F80] = "fmaxl";
- Names[RTLIB::FMAX_F128] = "fmaxl";
- Names[RTLIB::FMAX_PPCF128] = "fmaxl";
- Names[RTLIB::ROUND_F32] = "roundf";
- Names[RTLIB::ROUND_F64] = "round";
- Names[RTLIB::ROUND_F80] = "roundl";
- Names[RTLIB::ROUND_F128] = "roundl";
- Names[RTLIB::ROUND_PPCF128] = "roundl";
- Names[RTLIB::COPYSIGN_F32] = "copysignf";
- Names[RTLIB::COPYSIGN_F64] = "copysign";
- Names[RTLIB::COPYSIGN_F80] = "copysignl";
- Names[RTLIB::COPYSIGN_F128] = "copysignl";
- Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
- Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq";
- Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq";
- Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
- Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
- Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+#define HANDLE_LIBCALL(code, name) \
+ Names[RTLIB::code] = name;
+#include "llvm/CodeGen/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
+
+ // A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
// of the gnueabi-style __gnu_*_ieee.
@@ -268,264 +118,8 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
}
- Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
- Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
- Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
- Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2";
- Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
- Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
- Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
- Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos";
- Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
- Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
- Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod";
- Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
- Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
- Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
- Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
- Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
- Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
- Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
- Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
- Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
- Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
- Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
- Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
- Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou";
- Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
- Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
- Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
- Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
- Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
- Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
- Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
- Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
- Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
- Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
- Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
- Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
- Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
- Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
- Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
- Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
- Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
- Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
- Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
- Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
- Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
- Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq";
- Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
- Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
- Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
- Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
- Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
- Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
- Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
- Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
- Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
- Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
- Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
- Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
- Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
- Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
- Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq";
- Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
- Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
- Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
- Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
- Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
- Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
- Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
- Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
- Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
- Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
- Names[RTLIB::OEQ_F32] = "__eqsf2";
- Names[RTLIB::OEQ_F64] = "__eqdf2";
- Names[RTLIB::OEQ_F128] = "__eqtf2";
- Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq";
- Names[RTLIB::UNE_F32] = "__nesf2";
- Names[RTLIB::UNE_F64] = "__nedf2";
- Names[RTLIB::UNE_F128] = "__netf2";
- Names[RTLIB::UNE_PPCF128] = "__gcc_qne";
- Names[RTLIB::OGE_F32] = "__gesf2";
- Names[RTLIB::OGE_F64] = "__gedf2";
- Names[RTLIB::OGE_F128] = "__getf2";
- Names[RTLIB::OGE_PPCF128] = "__gcc_qge";
- Names[RTLIB::OLT_F32] = "__ltsf2";
- Names[RTLIB::OLT_F64] = "__ltdf2";
- Names[RTLIB::OLT_F128] = "__lttf2";
- Names[RTLIB::OLT_PPCF128] = "__gcc_qlt";
- Names[RTLIB::OLE_F32] = "__lesf2";
- Names[RTLIB::OLE_F64] = "__ledf2";
- Names[RTLIB::OLE_F128] = "__letf2";
- Names[RTLIB::OLE_PPCF128] = "__gcc_qle";
- Names[RTLIB::OGT_F32] = "__gtsf2";
- Names[RTLIB::OGT_F64] = "__gtdf2";
- Names[RTLIB::OGT_F128] = "__gttf2";
- Names[RTLIB::OGT_PPCF128] = "__gcc_qgt";
- Names[RTLIB::UO_F32] = "__unordsf2";
- Names[RTLIB::UO_F64] = "__unorddf2";
- Names[RTLIB::UO_F128] = "__unordtf2";
- Names[RTLIB::UO_PPCF128] = "__gcc_qunord";
- Names[RTLIB::O_F32] = "__unordsf2";
- Names[RTLIB::O_F64] = "__unorddf2";
- Names[RTLIB::O_F128] = "__unordtf2";
- Names[RTLIB::O_PPCF128] = "__gcc_qunord";
- Names[RTLIB::MEMCPY] = "memcpy";
- Names[RTLIB::MEMMOVE] = "memmove";
- Names[RTLIB::MEMSET] = "memset";
- Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] =
- "__llvm_memcpy_element_unordered_atomic_1";
- Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] =
- "__llvm_memcpy_element_unordered_atomic_2";
- Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] =
- "__llvm_memcpy_element_unordered_atomic_4";
- Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] =
- "__llvm_memcpy_element_unordered_atomic_8";
- Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
- "__llvm_memcpy_element_unordered_atomic_16";
- Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] =
- "__llvm_memmove_element_unordered_atomic_1";
- Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] =
- "__llvm_memmove_element_unordered_atomic_2";
- Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] =
- "__llvm_memmove_element_unordered_atomic_4";
- Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] =
- "__llvm_memmove_element_unordered_atomic_8";
- Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
- "__llvm_memmove_element_unordered_atomic_16";
- Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
- "__llvm_memset_element_unordered_atomic_1";
- Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
- "__llvm_memset_element_unordered_atomic_2";
- Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
- "__llvm_memset_element_unordered_atomic_4";
- Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
- "__llvm_memset_element_unordered_atomic_8";
- Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
- "__llvm_memset_element_unordered_atomic_16";
- Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
- Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
- Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16";
- Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
- Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
- Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
- Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
- Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16";
- Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
- Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
- Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
- Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
- Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16";
- Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
- Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
- Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
- Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
- Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16";
- Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
- Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
- Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
- Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
- Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16";
- Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
- Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
- Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
- Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
- Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16";
- Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
- Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
- Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
- Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
- Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16";
- Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1";
- Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2";
- Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4";
- Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8";
- Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16";
- Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1";
- Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2";
- Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4";
- Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8";
- Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16";
- Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1";
- Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2";
- Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4";
- Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8";
- Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16";
- Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1";
- Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2";
- Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
- Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
- Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
-
- Names[RTLIB::ATOMIC_LOAD] = "__atomic_load";
- Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1";
- Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2";
- Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4";
- Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8";
- Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16";
-
- Names[RTLIB::ATOMIC_STORE] = "__atomic_store";
- Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1";
- Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2";
- Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4";
- Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8";
- Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16";
-
- Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange";
- Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1";
- Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2";
- Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4";
- Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8";
- Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16";
-
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange";
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1";
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2";
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4";
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8";
- Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16";
-
- Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1";
- Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2";
- Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4";
- Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8";
- Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16";
- Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1";
- Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2";
- Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4";
- Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8";
- Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16";
- Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1";
- Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2";
- Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4";
- Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8";
- Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16";
- Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1";
- Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2";
- Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4";
- Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8";
- Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16";
- Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1";
- Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2";
- Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4";
- Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8";
- Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16";
- Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1";
- Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2";
- Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4";
- Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8";
- Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16";
-
- if (TT.isGNUEnvironment()) {
+
+ if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
Names[RTLIB::SINCOS_F32] = "sincosf";
Names[RTLIB::SINCOS_F64] = "sincos";
Names[RTLIB::SINCOS_F80] = "sincosl";
@@ -533,11 +127,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SINCOS_PPCF128] = "sincosl";
}
- if (!TT.isOSOpenBSD()) {
- Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
+ if (TT.isOSOpenBSD()) {
+ Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
}
-
- Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
}
/// Set default libcall CallingConvs.
@@ -858,7 +450,6 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
}
/// InitCmpLibcallCCs - Set default comparison libcall CC.
-///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
@@ -929,6 +520,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MaxAtomicSizeInBitsSupported = 1024;
MinCmpXchgSizeInBits = 0;
+ SupportsUnalignedAtomics = false;
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
@@ -1044,7 +636,6 @@ void TargetLoweringBase::initActions() {
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
- //
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
}
@@ -1156,7 +747,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// found, fallback to the usual mechanism of widening/splitting the
// vector.
EVT OldEltVT = EltVT;
- while (1) {
+ while (true) {
// Increase the bitwidth of the element to the next pow-of-two
// (which is greater than 8 bits).
EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
@@ -1184,7 +775,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// Try to widen the vector until a legal type is found.
// If there is no wider legal type, split the vector.
- while (1) {
+ while (true) {
// Round up to the next power of 2.
NumElts = (unsigned)NextPowerOf2(NumElts);
@@ -1276,7 +867,7 @@ MachineBasicBlock *
TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MachineBasicBlock *MBB) const {
MachineInstr *MI = &InitialMI;
- MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFunction &MF = *MI->getMF();
MachineFrameInfo &MFI = MF.getFrameInfo();
// We're handling multiple types of operands here:
@@ -1495,7 +1086,7 @@ void TargetLoweringBase::computeRegisterProperties(
bool IsLegalWiderType = false;
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
- case TypePromoteInteger: {
+ case TypePromoteInteger:
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
@@ -1515,8 +1106,8 @@ void TargetLoweringBase::computeRegisterProperties(
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
- }
- case TypeWidenVector: {
+
+ case TypeWidenVector:
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
@@ -1533,7 +1124,7 @@ void TargetLoweringBase::computeRegisterProperties(
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
- }
+
case TypeSplitVector:
case TypeScalarizeVector: {
MVT IntermediateVT;
@@ -1598,7 +1189,6 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
/// This method returns the number of registers needed, and the VT for each
/// register. It also returns the VT and quantity of the intermediate values
/// before they are promoted/expanded.
-///
unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
@@ -1911,7 +1501,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
@@ -2002,8 +1592,8 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
/// Get the reciprocal estimate attribute string for a function that will
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
- const Function *F = MF.getFunction();
- return F->getFnAttribute("reciprocal-estimates").getValueAsString();
+ const Function &F = MF.getFunction();
+ return F.getFnAttribute("reciprocal-estimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6922e33c8d6c..24d4baa31e1f 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -52,6 +52,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -134,7 +135,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0));
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -168,8 +169,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
MMI, Streamer);
}
-static SectionKind
-getELFKindForNamedSection(StringRef Name, SectionKind K) {
+static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
// N.B.: The defaults used in here are no the same ones used in MC.
// We follow gcc, MC follows gas. For example, given ".section .eh_frame",
// both gas and MC will produce a section with no flags. Given
@@ -531,10 +531,8 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
Name = ".ctors";
else
Name = ".dtors";
- if (Priority != 65535) {
- Name += '.';
- Name += utostr(65535 - Priority);
- }
+ if (Priority != 65535)
+ raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
Type = ELF::SHT_PROGBITS;
}
@@ -1213,16 +1211,38 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
}
}
+static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
+ const Triple &T, bool IsCtor,
+ unsigned Priority,
+ const MCSymbol *KeySym,
+ MCSectionCOFF *Default) {
+ if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment())
+ return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+ std::string Name = IsCtor ? ".ctors" : ".dtors";
+ if (Priority != 65535)
+ raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
+
+ return Ctx.getAssociativeCOFFSection(
+ Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData()),
+ KeySym, 0);
+}
+
MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getContext().getAssociativeCOFFSection(
- cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0);
+ return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true,
+ Priority, KeySym,
+ cast<MCSectionCOFF>(StaticCtorSection));
}
MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getContext().getAssociativeCOFFSection(
- cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0);
+ return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false,
+ Priority, KeySym,
+ cast<MCSectionCOFF>(StaticDtorSection));
}
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
@@ -1234,32 +1254,40 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
// Wasm
//===----------------------------------------------------------------------===//
-static const Comdat *getWasmComdat(const GlobalValue *GV) {
+static void checkWasmComdat(const GlobalValue *GV) {
const Comdat *C = GV->getComdat();
if (!C)
- return nullptr;
+ return;
- if (C->getSelectionKind() != Comdat::Any)
- report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" +
- C->getName() + "' cannot be lowered.");
+ // TODO(sbc): At some point we may need COMDAT support but currently
+ // they are not supported.
+ report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() +
+ "' cannot be lowered.");
+}
- return C;
+static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {
+ // If we're told we have function data, then use that.
+ if (K.isText())
+ return SectionKind::getText();
+
+ // Otherwise, ignore whatever section type the generic impl detected and use
+ // a plain data section.
+ return SectionKind::getData();
}
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- llvm_unreachable("getExplicitSectionGlobal not yet implemented");
- return nullptr;
+ StringRef Name = GO->getSection();
+ checkWasmComdat(GO);
+ Kind = getWasmKindForNamedSection(Name, Kind);
+ return getContext().getWasmSection(Name, Kind);
}
-static MCSectionWasm *
-selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM, bool EmitUniqueSection,
- unsigned Flags, unsigned *NextUniqueID) {
+static MCSectionWasm *selectWasmSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
StringRef Group = "";
- if (getWasmComdat(GO))
- llvm_unreachable("comdat not yet supported for wasm");
+ checkWasmComdat(GO);
bool UniqueSectionNames = TM.getUniqueSectionNames();
SmallString<128> Name = getSectionPrefixForGlobal(Kind);
@@ -1279,8 +1307,7 @@ selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
}
- return Ctx.getWasmSection(Name, /*Type=*/0, Flags,
- Group, UniqueID);
+ return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
}
MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
@@ -1299,8 +1326,7 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
EmitUniqueSection |= GO->hasComdat();
return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
- EmitUniqueSection, /*Flags=*/0,
- &NextUniqueID);
+ EmitUniqueSection, &NextUniqueID);
}
bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
@@ -1330,7 +1356,21 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
-void
-TargetLoweringObjectFileWasm::InitializeWasm() {
- // TODO: Initialize StaticCtorSection and StaticDtorSection.
+void TargetLoweringObjectFileWasm::InitializeWasm() {
+ StaticCtorSection =
+ getContext().getWasmSection(".init_array", SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return Priority == UINT16_MAX ?
+ StaticCtorSection :
+ getContext().getWasmSection(".init_array." + utostr(Priority),
+ SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ llvm_unreachable("@llvm.global_dtors should have been lowered already");
+ return nullptr;
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index ed845e1706f8..853e71d0efa5 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -13,11 +13,11 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
/// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -28,7 +28,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
return true;
// Check to see if we should eliminate non-leaf frame pointers.
- if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+ if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf"))
return MF.getFrameInfo().hasCalls();
return false;
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 817e58ce59e1..121bed5a79cb 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -47,6 +47,9 @@
using namespace llvm;
+cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
+ cl::desc("Enable interprocedural register allocation "
+ "to reduce load/store at procedure calls."));
static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc Scheduler"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -90,7 +93,11 @@ static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inli
static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
- cl::init(false));
+ cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableMergeICmps("enable-mergeicmps",
+ cl::desc("Merge ICmp chains into a single memcmp"),
+ cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -104,6 +111,11 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
cl::Hidden,
cl::desc("Enable machine outliner"));
+static cl::opt<bool> EnableLinkOnceODROutlining(
+ "enable-linkonceodr-outlining",
+ cl::Hidden,
+ cl::desc("Enable the machine outliner on linkonceodr functions"),
+ cl::init(false));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -115,10 +127,9 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalISel("global-isel", cl::Hidden,
cl::desc("Enable the \"global\" instruction selector"));
-static cl::opt<std::string>
-PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
- cl::desc("Print machine instrs"),
- cl::value_desc("pass-name"), cl::init("option-unspecified"));
+static cl::opt<std::string> PrintMachineInstrs(
+ "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
static cl::opt<int> EnableGlobalISelAbort(
"global-isel-abort", cl::Hidden,
@@ -153,6 +164,34 @@ static cl::opt<CFLAAType> UseCFLAA(
clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
+/// Option names for limiting the codegen pipeline.
+/// Those are used in error reporting and we didn't want
+/// to duplicate their names all over the place.
+const char *StartAfterOptName = "start-after";
+const char *StartBeforeOptName = "start-before";
+const char *StopAfterOptName = "stop-after";
+const char *StopBeforeOptName = "stop-before";
+
+static cl::opt<std::string>
+ StartAfterOpt(StringRef(StartAfterOptName),
+ cl::desc("Resume compilation after a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StartBeforeOpt(StringRef(StartBeforeOptName),
+ cl::desc("Resume compilation before a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StopAfterOpt(StringRef(StopAfterOptName),
+ cl::desc("Stop compilation after a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+ StopBeforeOpt(StringRef(StopBeforeOptName),
+ cl::desc("Stop compilation before a specific pass"),
+ cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -282,6 +321,37 @@ TargetPassConfig::~TargetPassConfig() {
delete Impl;
}
+static const PassInfo *getPassInfo(StringRef PassName) {
+ if (PassName.empty())
+ return nullptr;
+
+ const PassRegistry &PR = *PassRegistry::getPassRegistry();
+ const PassInfo *PI = PR.getPassInfo(PassName);
+ if (!PI)
+ report_fatal_error(Twine('\"') + Twine(PassName) +
+ Twine("\" pass is not registered."));
+ return PI;
+}
+
+static AnalysisID getPassIDFromName(StringRef PassName) {
+ const PassInfo *PI = getPassInfo(PassName);
+ return PI ? PI->getTypeInfo() : nullptr;
+}
+
+void TargetPassConfig::setStartStopPasses() {
+ StartBefore = getPassIDFromName(StartBeforeOpt);
+ StartAfter = getPassIDFromName(StartAfterOpt);
+ StopBefore = getPassIDFromName(StopBeforeOpt);
+ StopAfter = getPassIDFromName(StopAfterOpt);
+ if (StartBefore && StartAfter)
+ report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+ Twine(StartAfterOptName) + Twine(" specified!"));
+ if (StopBefore && StopAfter)
+ report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+ Twine(StopAfterOptName) + Twine(" specified!"));
+ Started = (StartAfter == nullptr) && (StartBefore == nullptr);
+}
+
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
@@ -303,8 +373,17 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
TM.Options.PrintMachineCode = true;
+ if (EnableIPRA.getNumOccurrences())
+ TM.Options.EnableIPRA = EnableIPRA;
+ else {
+ // If not explicitly specified, use target default.
+ TM.Options.EnableIPRA = TM.useIPRA();
+ }
+
if (TM.Options.EnableIPRA)
setRequiresCodeGenSCCOrder();
+
+ setStartStopPasses();
}
CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
@@ -339,6 +418,30 @@ TargetPassConfig::TargetPassConfig()
"triple set?");
}
+bool TargetPassConfig::hasLimitedCodeGenPipeline() const {
+ return StartBefore || StartAfter || StopBefore || StopAfter;
+}
+
+std::string
+TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) const {
+ if (!hasLimitedCodeGenPipeline())
+ return std::string();
+ std::string Res;
+ static cl::opt<std::string> *PassNames[] = {&StartAfterOpt, &StartBeforeOpt,
+ &StopAfterOpt, &StopBeforeOpt};
+ static const char *OptNames[] = {StartAfterOptName, StartBeforeOptName,
+ StopAfterOptName, StopBeforeOptName};
+ bool IsFirst = true;
+ for (int Idx = 0; Idx < 4; ++Idx)
+ if (!PassNames[Idx]->empty()) {
+ if (!IsFirst)
+ Res += Separator;
+ IsFirst = false;
+ Res += OptNames[Idx];
+ }
+ return Res;
+}
+
// Helper to verify the analysis is really immutable.
void TargetPassConfig::setOpt(bool &Opt, bool Val) {
assert(!Initialized && "PassConfig is immutable");
@@ -496,6 +599,16 @@ void TargetPassConfig::addIRPasses() {
addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
+ if (getOptLevel() != CodeGenOpt::None) {
+ // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+ // loads and compares. ExpandMemCmpPass then tries to expand those calls
+ // into optimally-sized loads and compares. The transforms are enabled by a
+ // target lowering hook.
+ if (EnableMergeICmps)
+ addPass(createMergeICmpsPass());
+ addPass(createExpandMemCmpPass());
+ }
+
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
addPass(createGCLoweringPass());
@@ -511,8 +624,8 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());
- // Insert calls to mcount-like functions.
- addPass(createCountingFunctionInserterPass());
+ // Instrument function entry and exit, e.g. with calls to mcount().
+ addPass(createPostInlineEntryExitInstrumenterPass());
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
@@ -653,10 +766,9 @@ bool TargetPassConfig::addISelPasses() {
/// -regalloc=... command line option.
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
- RegisterPassParser<RegisterRegAlloc> >
-RegAlloc("regalloc",
- cl::init(&useDefaultRegisterAllocator),
- cl::desc("Register allocator to use"));
+ RegisterPassParser<RegisterRegAlloc>>
+ RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
/// Add the complete set of target-independent postISel code generator passes.
///
@@ -694,9 +806,6 @@ void TargetPassConfig::addMachinePasses() {
// Print the instruction selected machine code...
printAndVerify("After Instruction Selection");
- if (TM->Options.EnableIPRA)
- addPass(createRegUsageInfoPropPass());
-
// Expand pseudo-instructions emitted by ISel.
addPass(&ExpandISelPseudosID);
@@ -709,6 +818,9 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LocalStackSlotAllocationID, false);
}
+ if (TM->Options.EnableIPRA)
+ addPass(createRegUsageInfoPropPass());
+
// Run pre-ra passes.
addPreRegAlloc();
@@ -788,7 +900,7 @@ void TargetPassConfig::addMachinePasses() {
addPass(&PatchableFunctionID, false);
if (EnableMachineOutliner)
- PM->add(createMachineOutlinerPass());
+ PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining));
AddingMachinePasses = false;
}
@@ -824,9 +936,6 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineLICMID, false);
addPass(&MachineCSEID, false);
- // Coalesce basic blocks with the same branch condition
- addPass(&BranchCoalescingID);
-
addPass(&MachineSinkingID);
addPass(&PeepholeOptimizerID);
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index eeb00a784b0d..f03c3b8300f3 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,13 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -27,9 +31,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <utility>
@@ -41,11 +42,14 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
regclass_iterator RCB, regclass_iterator RCE,
const char *const *SRINames,
const LaneBitmask *SRILaneMasks,
- LaneBitmask SRICoveringLanes)
+ LaneBitmask SRICoveringLanes,
+ const RegClassInfo *const RCIs,
+ unsigned Mode)
: InfoDesc(ID), SubRegIndexNames(SRINames),
SubRegIndexLaneMasks(SRILaneMasks),
RegClassBegin(RCB), RegClassEnd(RCE),
- CoveringLanes(SRICoveringLanes) {
+ CoveringLanes(SRICoveringLanes),
+ RCInfos(RCIs), HwMode(Mode) {
}
TargetRegisterInfo::~TargetRegisterInfo() = default;
@@ -65,8 +69,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
continue;
for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
- dbgs() << "Error: Super register " << PrintReg(*SR, this)
- << " of reserved register " << PrintReg(Reg, this)
+ dbgs() << "Error: Super register " << printReg(*SR, this)
+ << " of reserved register " << printReg(Reg, this)
<< " is not reserved.\n";
return false;
}
@@ -81,7 +85,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
namespace llvm {
-Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI,
unsigned SubIdx) {
return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
if (!Reg)
@@ -89,11 +93,15 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
else if (TargetRegisterInfo::isStackSlot(Reg))
OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
- else if (TRI && Reg < TRI->getNumRegs())
- OS << '%' << TRI->getName(Reg);
- else
- OS << "%physreg" << Reg;
+ OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (!TRI)
+ OS << '%' << "physreg" << Reg;
+ else if (Reg < TRI->getNumRegs()) {
+ OS << '%';
+ printLowerCase(TRI->getName(Reg), OS);
+ } else
+ llvm_unreachable("Register kind is unsupported.");
+
if (SubIdx) {
if (TRI)
OS << ':' << TRI->getSubRegIndexName(SubIdx);
@@ -103,7 +111,7 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
});
}
-Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
// Generic printout when TRI is missing.
if (!TRI) {
@@ -126,12 +134,27 @@ Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
if (TRI && TRI->isVirtualRegister(Unit)) {
- OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+ OS << '%' << TargetRegisterInfo::virtReg2Index(Unit);
} else {
- OS << PrintRegUnit(Unit, TRI);
+ OS << printRegUnit(Unit, TRI);
+ }
+ });
+}
+
+Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
+ if (RegInfo.getRegClassOrNull(Reg))
+ OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ else if (RegInfo.getRegBankOrNull(Reg))
+ OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
+ else {
+ OS << "_";
+ assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+ "Generic registers must have a valid type");
}
});
}
@@ -357,7 +380,7 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
}
// Compute target-independent register allocator hints to help eliminate copies.
-void
+bool
TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
@@ -365,49 +388,55 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
-
- // Hints with HintType != 0 were set by target-dependent code.
- // Such targets must provide their own implementation of
- // TRI::getRegAllocationHints to interpret those hint types.
- assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
-
- // Target-independent hints are either a physical or a virtual register.
- unsigned Phys = Hint.second;
- if (VRM && isVirtualRegister(Phys))
- Phys = VRM->getPhys(Phys);
-
- // Check that Phys is a valid hint in VirtReg's register class.
- if (!isPhysicalRegister(Phys))
- return;
- if (MRI.isReserved(Phys))
- return;
- // Check that Phys is in the allocation order. We shouldn't heed hints
- // from VirtReg's register class if they aren't in the allocation order. The
- // target probably has a reason for removing the register.
- if (!is_contained(Order, Phys))
- return;
-
- // All clear, tell the register allocator to prefer this register.
- Hints.push_back(Phys);
+ const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
+ MRI.getRegAllocationHints(VirtReg);
+
+ // First hint may be a target hint.
+ bool Skip = (Hints_MRI.first != 0);
+ for (auto Reg : Hints_MRI.second) {
+ if (Skip) {
+ Skip = false;
+ continue;
+ }
+
+ // Target-independent hints are either a physical or a virtual register.
+ unsigned Phys = Reg;
+ if (VRM && isVirtualRegister(Phys))
+ Phys = VRM->getPhys(Phys);
+
+ // Check that Phys is a valid hint in VirtReg's register class.
+ if (!isPhysicalRegister(Phys))
+ continue;
+ if (MRI.isReserved(Phys))
+ continue;
+ // Check that Phys is in the allocation order. We shouldn't heed hints
+ // from VirtReg's register class if they aren't in the allocation order. The
+ // target probably has a reason for removing the register.
+ if (!is_contained(Order, Phys))
+ continue;
+
+ // All clear, tell the register allocator to prefer this register.
+ Hints.push_back(Phys);
+ }
+ return false;
}
bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+ return !MF.getFunction().hasFnAttribute("no-realign-stack");
}
bool TargetRegisterInfo::needsStackRealignment(
const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
- if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+ F.hasFnAttribute(Attribute::StackAlignment));
+ if (F.hasFnAttribute("stackrealign") || requiresRealignment) {
if (canRealignStack(MF))
return true;
- DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+ DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n");
}
return false;
}
@@ -425,6 +454,6 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
const TargetRegisterInfo *TRI) {
- dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
+ dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n";
}
#endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 9210ea8a83f6..86dbf1b2aeab 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -16,15 +16,15 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -316,7 +316,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// correctly append imp-use operands, and readsReg() strangely returns false
// for predicated defs.
unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
- const MachineFunction &MF = *DefMI->getParent()->getParent();
+ const MachineFunction &MF = *DefMI->getMF();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
return computeInstrLatency(DefMI);
@@ -339,42 +339,46 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
static Optional<double>
getRThroughputFromItineraries(unsigned schedClass,
const InstrItineraryData *IID){
- double Unknown = std::numeric_limits<double>::infinity();
- double Throughput = Unknown;
+ Optional<double> Throughput;
for (const InstrStage *IS = IID->beginStage(schedClass),
*E = IID->endStage(schedClass);
IS != E; ++IS) {
- unsigned Cycles = IS->getCycles();
- if (!Cycles)
- continue;
- Throughput =
- std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+ if (IS->getCycles()) {
+ double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles();
+ Throughput = Throughput.hasValue()
+ ? std::min(Throughput.getValue(), Temp)
+ : Temp;
+ }
}
- // We need reciprocal throughput that's why we return such value.
- return 1 / Throughput;
+ if (Throughput.hasValue())
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput.getValue();
+ return Throughput;
}
static Optional<double>
getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
const TargetSubtargetInfo *STI,
const MCSchedModel &SchedModel) {
- double Unknown = std::numeric_limits<double>::infinity();
- double Throughput = Unknown;
+ Optional<double> Throughput;
for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
*WEnd = STI->getWriteProcResEnd(SCDesc);
WPR != WEnd; ++WPR) {
- unsigned Cycles = WPR->Cycles;
- if (!Cycles)
- return Optional<double>();
-
- unsigned NumUnits =
- SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
- Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+ if (WPR->Cycles) {
+ unsigned NumUnits =
+ SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+ double Temp = NumUnits * 1.0 / WPR->Cycles;
+ Throughput = Throughput.hasValue()
+ ? std::min(Throughput.getValue(), Temp)
+ : Temp;
+ }
}
- // We need reciprocal throughput that's why we return such value.
- return 1 / Throughput;
+ if (Throughput.hasValue())
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput.getValue();
+ return Throughput;
}
Optional<double>
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index f6d5bc80ddff..1a317cd865f0 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
@@ -50,6 +51,10 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
return true;
}
+bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
+ return false;
+}
+
bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
@@ -93,9 +98,15 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
// that could be changed during the compilation
TargetSchedModel TSchedModel;
TSchedModel.init(getSchedModel(), this, getInstrInfo());
- if (!TSchedModel.hasInstrSchedModel())
+ unsigned Latency;
+ if (TSchedModel.hasInstrSchedModel())
+ Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ else if (TSchedModel.hasInstrItineraries()) {
+ auto *ItinData = TSchedModel.getInstrItineraries();
+ Latency = ItinData->getStageLatency(
+ getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
+ } else
return std::string();
- unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
Optional<double> RThroughput =
TSchedModel.computeInstrRThroughput(MCI.getOpcode());
return createSchedInfoStr(Latency, RThroughput);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 83c00e24d14f..774b76f84b7f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1,4 +1,4 @@
-//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,27 +28,40 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -76,6 +89,7 @@ static cl::opt<unsigned> MaxDataFlowEdge(
"the benefit of commuting operands"));
namespace {
+
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
const TargetInstrInfo *TII;
@@ -96,6 +110,10 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// Set of already processed instructions in the current block.
SmallPtrSet<MachineInstr*, 8> Processed;
+ // Set of instructions converted to three-address by target and then sunk
+ // down current basic block.
+ SmallPtrSet<MachineInstr*, 8> SunkInstrs;
+
// A map from virtual registers to physical registers which are likely targets
// to be coalesced to due to copies from physical registers to virtual
// registers. e.g. v1024 = move r0.
@@ -148,14 +166,16 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
void processCopy(MachineInstr *MI);
- typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
- typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ using TiedPairList = SmallVector<std::pair<unsigned, unsigned>, 4>;
+ using TiedOperandMap = SmallDenseMap<unsigned, TiedPairList>;
+
bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
void eliminateRegSequence(MachineBasicBlock::iterator&);
public:
static char ID; // Pass identification, replacement for typeid
+
TwoAddressInstructionPass() : MachineFunctionPass(ID) {
initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
}
@@ -175,17 +195,19 @@ public:
/// Pass entry point.
bool runOnMachineFunction(MachineFunction&) override;
};
+
} // end anonymous namespace
char TwoAddressInstructionPass::ID = 0;
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
-char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
-
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
/// A two-address instruction has been converted to a three-address instruction
@@ -267,7 +289,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
++KillPos;
unsigned NumVisited = 0;
- for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) {
+ for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) {
// DBG_VALUE cannot be counted against the limit.
if (OtherMI.isDebugValue())
continue;
@@ -436,8 +458,8 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
/// For example, in this code:
///
/// %reg1034 = copy %reg1024
-/// %reg1035 = copy %reg1025<kill>
-/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+/// %reg1035 = copy killed %reg1025
+/// %reg1036 = add killed %reg1034, killed %reg1035
///
/// %reg1034 is not considered to be killed, since it is copied from a
/// register which is not killed. Treating it as not killed lets the
@@ -452,7 +474,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
LiveIntervals *LIS,
bool allowFalsePositives) {
MachineInstr *DefMI = &MI;
- for (;;) {
+ while (true) {
// All uses of physical registers are likely to be kills.
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
(allowFalsePositives || MRI->hasOneUse(Reg)))
@@ -569,31 +591,31 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// general, we want no uses between this instruction and the definition of
// the two-address register.
// e.g.
- // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
- // %reg1029<def> = MOV8rr %reg1028
- // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
- // insert => %reg1030<def> = MOV8rr %reg1028
- // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+ // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+ // insert => %reg1030 = MOV8rr %reg1028
+ // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
// In this case, it might not be possible to coalesce the second MOV8rr
// instruction if the first one is coalesced. So it would be profitable to
// commute it:
- // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
- // %reg1029<def> = MOV8rr %reg1028
- // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
- // insert => %reg1030<def> = MOV8rr %reg1029
- // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+ // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+ // %reg1029 = MOV8rr %reg1028
+ // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+ // insert => %reg1030 = MOV8rr %reg1029
+ // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
if (!isPlainlyKilled(MI, regC, LIS))
return false;
// Ok, we have something like:
- // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
// let's see if it's worth commuting it.
// Look for situations like this:
- // %reg1024<def> = MOV r1
- // %reg1025<def> = MOV r0
- // %reg1026<def> = ADD %reg1024, %reg1025
+ // %reg1024 = MOV r1
+ // %reg1025 = MOV r0
+ // %reg1026 = ADD %reg1024, %reg1025
// r0 = MOV %reg1026
// Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
unsigned ToRegA = getMappedReg(regA, DstRegMap);
@@ -691,9 +713,9 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
bool
TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
// Look for situations like this:
- // %reg1024<def> = MOV r1
- // %reg1025<def> = MOV r0
- // %reg1026<def> = ADD %reg1024, %reg1025
+ // %reg1024 = MOV r1
+ // %reg1025 = MOV r0
+ // %reg1026 = ADD %reg1024, %reg1025
// r2 = MOV %reg1026
// Turn ADD into a 3-address instruction to avoid a copy.
unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
@@ -738,6 +760,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
mi = NewMI;
nmi = std::next(mi);
}
+ else
+ SunkInstrs.insert(NewMI);
// Update source and destination register maps.
SrcRegMap.erase(RegA);
@@ -904,7 +928,6 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Move the copies connected to MI down as well.
MachineBasicBlock::iterator Begin = MI;
MachineBasicBlock::iterator AfterMI = std::next(Begin);
-
MachineBasicBlock::iterator End = AfterMI;
while (End->isCopy() &&
regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
@@ -916,7 +939,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
- for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) {
+ for (MachineInstr &OtherMI : make_range(End, KillPos)) {
// DBG_VALUE cannot be counted against the limit.
if (OtherMI.isDebugValue())
continue;
@@ -1090,7 +1113,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Check if the reschedule will not break depedencies.
unsigned NumVisited = 0;
for (MachineInstr &OtherMI :
- llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) {
+ make_range(mi, MachineBasicBlock::iterator(KillMI))) {
// DBG_VALUE cannot be counted against the limit.
if (OtherMI.isDebugValue())
continue;
@@ -1443,7 +1466,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
- // Deal with <undef> uses immediately - simply rewrite the src operand.
+ // Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
if (TargetRegisterInfo::isVirtualRegister(DstReg))
@@ -1609,7 +1632,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (I->end == UseIdx)
LI.removeSegment(LastCopyIdx, UseIdx);
}
-
} else if (RemovedKillFlag) {
// Some tied uses of regB matched their destination registers, so
// regB is still used in this instruction, but a kill flag was
@@ -1639,6 +1661,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
else
AA = nullptr;
OptLevel = TM.getOptLevel();
+ // Disable optimizations if requested. We cannot skip the whole pass as some
+ // fixups are necessary for correctness.
+ if (skipFunction(Func.getFunction()))
+ OptLevel = CodeGenOpt::None;
bool MadeChange = false;
@@ -1658,10 +1684,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
SrcRegMap.clear();
DstRegMap.clear();
Processed.clear();
+ SunkInstrs.clear();
for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
mi != me; ) {
MachineBasicBlock::iterator nmi = std::next(mi);
- if (mi->isDebugValue()) {
+ // Don't revisit an instruction previously converted by target. It may
+ // contain undef register operands (%noreg), which are not handled.
+ if (mi->isDebugValue() || SunkInstrs.count(&*mi)) {
mi = nmi;
continue;
}
@@ -1690,7 +1719,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// transformations that may either eliminate the tied operands or
// improve the opportunities for coalescing away the register copy.
if (TiedOperands.size() == 1) {
- SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs
+ SmallVectorImpl<std::pair<unsigned, unsigned>> &TiedPairs
= TiedOperands.begin()->second;
if (TiedPairs.size() == 1) {
unsigned SrcIdx = TiedPairs[0].first;
@@ -1749,9 +1778,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
///
/// Becomes:
///
-/// %dst:ssub0<def,undef> = COPY %v1
-/// %dst:ssub1<def> = COPY %v2
-///
+/// undef %dst:ssub0 = COPY %v1
+/// %dst:ssub1 = COPY %v2
void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -1775,7 +1803,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineOperand &UseMO = MI.getOperand(i);
unsigned SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
- // Nothing needs to be inserted for <undef> operands.
+ // Nothing needs to be inserted for undef operands.
if (UseMO.isUndef())
continue;
@@ -1797,7 +1825,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
.addReg(DstReg, RegState::Define, SubIdx)
.add(UseMO);
- // The first def needs an <undef> flag because there is no live register
+ // The first def needs an undef flag because there is no live register
// before it.
if (!DefEmitted) {
CopyMI->getOperand(0).setIsUndef(true);
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 407fd9b162e9..5288ca672774 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
@@ -37,7 +38,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
static bool eliminateUnreachableBlock(Function &F) {
@@ -207,11 +207,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
MachineRegisterInfo &MRI = F.getRegInfo();
unsigned InputSub = Input.getSubReg();
if (InputSub == 0 &&
- MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
+ MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)) &&
+ !Input.isUndef()) {
MRI.replaceRegWith(OutputReg, InputReg);
} else {
// The input register to the PHI has a subregister or it can't be
- // constrained to the proper register class:
+ // constrained to the proper register class or it is undef:
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index f8aacdb8649d..64bb37a280a6 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,24 +18,32 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "LiveDebugVariables.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -132,8 +140,8 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
- OS << '[' << PrintReg(Reg, TRI) << " -> "
- << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ OS << '[' << printReg(Reg, TRI) << " -> "
+ << printReg(Virt2PhysMap[Reg], TRI) << "] "
<< TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
@@ -141,7 +149,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
- OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
<< "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
@@ -164,9 +172,9 @@ LLVM_DUMP_METHOD void VirtRegMap::dump() const {
// according to LiveIntervals.
//
namespace {
+
class VirtRegRewriter : public MachineFunctionPass {
MachineFunction *MF;
- const TargetMachine *TM;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
@@ -184,18 +192,23 @@ class VirtRegRewriter : public MachineFunctionPass {
public:
static char ID;
+
VirtRegRewriter() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction&) override;
+
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
};
+
} // end anonymous namespace
+char VirtRegRewriter::ID = 0;
+
char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
@@ -208,8 +221,6 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
"Virtual Register Rewriter", false, false)
-char VirtRegRewriter::ID = 0;
-
void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<LiveIntervals>();
@@ -224,7 +235,6 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
- TM = &MF->getTarget();
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
MRI = &MF->getRegInfo();
@@ -260,8 +270,9 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
assert(!LI.empty());
assert(LI.hasSubRanges());
- typedef std::pair<const LiveInterval::SubRange *,
- LiveInterval::const_iterator> SubRangeIteratorPair;
+ using SubRangeIteratorPair =
+ std::pair<const LiveInterval::SubRange *, LiveInterval::const_iterator>;
+
SmallVector<SubRangeIteratorPair, 4> SubRanges;
SlotIndex First;
SlotIndex Last;
@@ -369,8 +380,8 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
++NumIdCopies;
// Copies like:
- // %R0 = COPY %R0<undef>
- // %AL = COPY %AL, %EAX<imp-def>
+ // %r0 = COPY undef %r0
+ // %al = COPY %al, implicit-def %eax
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
@@ -477,7 +488,7 @@ void VirtRegRewriter::rewrite() {
if (SubReg != 0) {
if (NoSubRegLiveness) {
// A virtual register kill refers to the whole register, so we may
- // have to add <imp-use,kill> operands for the super-register. A
+ // have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
(MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
@@ -502,9 +513,9 @@ void VirtRegRewriter::rewrite() {
}
}
- // The <def,undef> and <def,internal> flags only make sense for
+ // The def undef and def internal flags only make sense for
// sub-register defs, and we are substituting a full physreg. An
- // <imp-use,kill> operand from the SuperKills list will represent the
+ // implicit killed operand from the SuperKills list will represent the
// partial read of the super-register.
if (MO.isDef()) {
MO.setIsUndef(false);
@@ -519,6 +530,7 @@ void VirtRegRewriter::rewrite() {
// Rewrite. Note we could have used MachineOperand::substPhysReg(), but
// we need the inlining here.
MO.setReg(PhysReg);
+ MO.setIsRenamableIfNoExtraRegAllocReq();
}
// Add any missing super-register kills after rewriting the whole
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index c63a0a9e60ea..7ad84734203d 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -1014,6 +1014,7 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
removeUnreachableBlocks(F);
}
+#ifndef NDEBUG
void WinEHPrepare::verifyPreparedFunclets(Function &F) {
for (BasicBlock &BB : F) {
size_t NumColors = BlockColors[&BB].size();
@@ -1026,6 +1027,7 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
"EH Pad still has a PHI!");
}
}
+#endif
bool WinEHPrepare::prepareExplicitEH(Function &F) {
// Remove unreachable blocks. It is not valuable to assign them a color and
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 0b4c6e551667..3d83afcf1fc5 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -14,8 +14,8 @@
//
//===---------------------------------------------------------------------===//
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -23,17 +23,26 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
+struct InstrumentationOptions {
+ // Whether to emit PATCHABLE_TAIL_CALL.
+ bool HandleTailcall;
+
+ // Whether to emit PATCHABLE_RET/PATCHABLE_FUNCTION_EXIT for all forms of
+ // return, e.g. conditional return.
+ bool HandleAllReturns;
+};
+
struct XRayInstrumentation : public MachineFunctionPass {
static char ID;
@@ -59,7 +68,8 @@ private:
// This is the approach to go on CPUs which have a single RET instruction,
// like x86/x86_64.
void replaceRetWithPatchableRet(MachineFunction &MF,
- const TargetInstrInfo *TII);
+ const TargetInstrInfo *TII,
+ InstrumentationOptions);
// Prepend the original return instruction with the exit sled code ("patchable
// function exit" pseudo-instruction), preserving the original return
@@ -70,25 +80,28 @@ private:
// have to call the trampoline and return from it to the original return
// instruction of the function being instrumented.
void prependRetWithPatchableExit(MachineFunction &MF,
- const TargetInstrInfo *TII);
+ const TargetInstrInfo *TII,
+ InstrumentationOptions);
};
} // end anonymous namespace
void XRayInstrumentation::replaceRetWithPatchableRet(
- MachineFunction &MF, const TargetInstrInfo *TII) {
+ MachineFunction &MF, const TargetInstrInfo *TII,
+ InstrumentationOptions op) {
// We look for *all* terminators and returns, then replace those with
// PATCHABLE_RET instructions.
SmallVector<MachineInstr *, 4> Terminators;
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
unsigned Opc = 0;
- if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+ if (T.isReturn() &&
+ (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
// Replace return instructions with:
// PATCHABLE_RET <Opcode>, <Operand>...
Opc = TargetOpcode::PATCHABLE_RET;
}
- if (TII->isTailCall(T)) {
+ if (TII->isTailCall(T) && op.HandleTailcall) {
// Treat the tail call as a return instruction, which has a
// different-looking sled than the normal return case.
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
@@ -108,14 +121,16 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
}
void XRayInstrumentation::prependRetWithPatchableExit(
- MachineFunction &MF, const TargetInstrInfo *TII) {
- for (auto &MBB : MF) {
+ MachineFunction &MF, const TargetInstrInfo *TII,
+ InstrumentationOptions op) {
+ for (auto &MBB : MF)
for (auto &T : MBB.terminators()) {
unsigned Opc = 0;
- if (T.isReturn()) {
+ if (T.isReturn() &&
+ (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
}
- if (TII->isTailCall(T)) {
+ if (TII->isTailCall(T) && op.HandleTailcall) {
Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
}
if (Opc != 0) {
@@ -124,11 +139,10 @@ void XRayInstrumentation::prependRetWithPatchableExit(
BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc));
}
}
- }
}
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
- auto &F = *MF.getFunction();
+ auto &F = MF.getFunction();
auto InstrAttr = F.getFnAttribute("function-instrument");
bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
InstrAttr.isStringAttribute() &&
@@ -143,7 +157,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
// Count the number of MachineInstr`s in MachineFunction
int64_t MICount = 0;
- for (const auto& MBB : MF)
+ for (const auto &MBB : MF)
MICount += MBB.size();
// Check if we have a loop.
@@ -180,20 +194,35 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
- case Triple::ArchType::ppc64le:
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64:
- case Triple::ArchType::mips64el:
+ case Triple::ArchType::mips64el: {
// For the architectures which don't have a single return instruction
- prependRetWithPatchableExit(MF, TII);
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ prependRetWithPatchableExit(MF, TII, op);
+ break;
+ }
+ case Triple::ArchType::ppc64le: {
+ // PPC has conditional returns. Turn them into branch and plain returns.
+ InstrumentationOptions op;
+ op.HandleTailcall = false;
+ op.HandleAllReturns = true;
+ replaceRetWithPatchableRet(MF, TII, op);
break;
- default:
+ }
+ default: {
// For the architectures that have a single return instruction (such as
// RETQ on x86_64).
- replaceRetWithPatchableRet(MF, TII);
+ InstrumentationOptions op;
+ op.HandleTailcall = true;
+ op.HandleAllReturns = false;
+ replaceRetWithPatchableRet(MF, TII, op);
break;
}
+ }
return true;
}